aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/actors
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/actors
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/actors')
-rw-r--r--library/cpp/actors/README.md107
-rw-r--r--library/cpp/actors/core/README.md99
-rw-r--r--library/cpp/actors/core/actor.cpp172
-rw-r--r--library/cpp/actors/core/actor.h530
-rw-r--r--library/cpp/actors/core/actor_bootstrapped.h37
-rw-r--r--library/cpp/actors/core/actor_coroutine.cpp165
-rw-r--r--library/cpp/actors/core/actor_coroutine.h174
-rw-r--r--library/cpp/actors/core/actor_coroutine_ut.cpp141
-rw-r--r--library/cpp/actors/core/actor_ut.cpp578
-rw-r--r--library/cpp/actors/core/actorid.cpp34
-rw-r--r--library/cpp/actors/core/actorid.h196
-rw-r--r--library/cpp/actors/core/actorsystem.cpp277
-rw-r--r--library/cpp/actors/core/actorsystem.h367
-rw-r--r--library/cpp/actors/core/actorsystem_ut.cpp45
-rw-r--r--library/cpp/actors/core/ask.cpp74
-rw-r--r--library/cpp/actors/core/ask.h18
-rw-r--r--library/cpp/actors/core/ask_ut.cpp131
-rw-r--r--library/cpp/actors/core/balancer.cpp293
-rw-r--r--library/cpp/actors/core/balancer.h27
-rw-r--r--library/cpp/actors/core/balancer_ut.cpp225
-rw-r--r--library/cpp/actors/core/buffer.cpp93
-rw-r--r--library/cpp/actors/core/buffer.h62
-rw-r--r--library/cpp/actors/core/callstack.cpp93
-rw-r--r--library/cpp/actors/core/callstack.h58
-rw-r--r--library/cpp/actors/core/config.h239
-rw-r--r--library/cpp/actors/core/cpu_manager.cpp108
-rw-r--r--library/cpp/actors/core/cpu_manager.h57
-rw-r--r--library/cpp/actors/core/cpu_state.h215
-rw-r--r--library/cpp/actors/core/defs.h69
-rw-r--r--library/cpp/actors/core/event.cpp38
-rw-r--r--library/cpp/actors/core/event.h344
-rw-r--r--library/cpp/actors/core/event_load.h112
-rw-r--r--library/cpp/actors/core/event_local.h74
-rw-r--r--library/cpp/actors/core/event_pb.cpp223
-rw-r--r--library/cpp/actors/core/event_pb.h500
-rw-r--r--library/cpp/actors/core/event_pb_payload_ut.cpp154
-rw-r--r--library/cpp/actors/core/event_pb_ut.cpp71
-rw-r--r--library/cpp/actors/core/events.h222
-rw-r--r--library/cpp/actors/core/events_undelivered.cpp60
-rw-r--r--library/cpp/actors/core/executelater.h87
-rw-r--r--library/cpp/actors/core/executor_pool_base.cpp168
-rw-r--r--library/cpp/actors/core/executor_pool_base.h49
-rw-r--r--library/cpp/actors/core/executor_pool_basic.cpp431
-rw-r--r--library/cpp/actors/core/executor_pool_basic.h111
-rw-r--r--library/cpp/actors/core/executor_pool_basic_ut.cpp435
-rw-r--r--library/cpp/actors/core/executor_pool_io.cpp151
-rw-r--r--library/cpp/actors/core/executor_pool_io.h49
-rw-r--r--library/cpp/actors/core/executor_pool_united.cpp1428
-rw-r--r--library/cpp/actors/core/executor_pool_united.h135
-rw-r--r--library/cpp/actors/core/executor_pool_united_ut.cpp338
-rw-r--r--library/cpp/actors/core/executor_thread.cpp563
-rw-r--r--library/cpp/actors/core/executor_thread.h112
-rw-r--r--library/cpp/actors/core/hfunc.h84
-rw-r--r--library/cpp/actors/core/interconnect.cpp170
-rw-r--r--library/cpp/actors/core/interconnect.h248
-rw-r--r--library/cpp/actors/core/invoke.h110
-rw-r--r--library/cpp/actors/core/io_dispatcher.cpp234
-rw-r--r--library/cpp/actors/core/io_dispatcher.h38
-rw-r--r--library/cpp/actors/core/lease.h56
-rw-r--r--library/cpp/actors/core/log.cpp753
-rw-r--r--library/cpp/actors/core/log.h369
-rw-r--r--library/cpp/actors/core/log_iface.h109
-rw-r--r--library/cpp/actors/core/log_settings.cpp230
-rw-r--r--library/cpp/actors/core/log_settings.h176
-rw-r--r--library/cpp/actors/core/log_ut.cpp185
-rw-r--r--library/cpp/actors/core/mailbox.cpp551
-rw-r--r--library/cpp/actors/core/mailbox.h553
-rw-r--r--library/cpp/actors/core/mailbox_queue_revolving.h214
-rw-r--r--library/cpp/actors/core/mailbox_queue_simple.h34
-rw-r--r--library/cpp/actors/core/memory_track.cpp38
-rw-r--r--library/cpp/actors/core/memory_track.h293
-rw-r--r--library/cpp/actors/core/memory_tracker.cpp103
-rw-r--r--library/cpp/actors/core/memory_tracker.h53
-rw-r--r--library/cpp/actors/core/memory_tracker_ut.cpp262
-rw-r--r--library/cpp/actors/core/mon.h234
-rw-r--r--library/cpp/actors/core/mon_stats.h147
-rw-r--r--library/cpp/actors/core/monotonic.cpp23
-rw-r--r--library/cpp/actors/core/monotonic.h111
-rw-r--r--library/cpp/actors/core/probes.cpp28
-rw-r--r--library/cpp/actors/core/probes.h176
-rw-r--r--library/cpp/actors/core/process_stats.cpp303
-rw-r--r--library/cpp/actors/core/process_stats.h66
-rw-r--r--library/cpp/actors/core/scheduler_actor.cpp279
-rw-r--r--library/cpp/actors/core/scheduler_actor.h29
-rw-r--r--library/cpp/actors/core/scheduler_actor_ut.cpp100
-rw-r--r--library/cpp/actors/core/scheduler_basic.cpp274
-rw-r--r--library/cpp/actors/core/scheduler_basic.h81
-rw-r--r--library/cpp/actors/core/scheduler_cookie.cpp84
-rw-r--r--library/cpp/actors/core/scheduler_cookie.h78
-rw-r--r--library/cpp/actors/core/scheduler_queue.h120
-rw-r--r--library/cpp/actors/core/servicemap.h168
-rw-r--r--library/cpp/actors/core/ut/ya.make46
-rw-r--r--library/cpp/actors/core/worker_context.cpp7
-rw-r--r--library/cpp/actors/core/worker_context.h175
-rw-r--r--library/cpp/actors/core/ya.make123
-rw-r--r--library/cpp/actors/dnscachelib/dnscache.cpp445
-rw-r--r--library/cpp/actors/dnscachelib/dnscache.h148
-rw-r--r--library/cpp/actors/dnscachelib/probes.cpp3
-rw-r--r--library/cpp/actors/dnscachelib/probes.h35
-rw-r--r--library/cpp/actors/dnscachelib/timekeeper.h70
-rw-r--r--library/cpp/actors/dnscachelib/ya.make24
-rw-r--r--library/cpp/actors/dnsresolver/dnsresolver.cpp475
-rw-r--r--library/cpp/actors/dnsresolver/dnsresolver.h128
-rw-r--r--library/cpp/actors/dnsresolver/dnsresolver_caching.cpp730
-rw-r--r--library/cpp/actors/dnsresolver/dnsresolver_caching_ut.cpp630
-rw-r--r--library/cpp/actors/dnsresolver/dnsresolver_ondemand.cpp64
-rw-r--r--library/cpp/actors/dnsresolver/dnsresolver_ondemand_ut.cpp24
-rw-r--r--library/cpp/actors/dnsresolver/dnsresolver_ut.cpp98
-rw-r--r--library/cpp/actors/dnsresolver/ut/ya.make20
-rw-r--r--library/cpp/actors/dnsresolver/ya.make20
-rw-r--r--library/cpp/actors/helpers/activeactors.cpp2
-rw-r--r--library/cpp/actors/helpers/activeactors.h42
-rw-r--r--library/cpp/actors/helpers/flow_controlled_queue.cpp215
-rw-r--r--library/cpp/actors/helpers/flow_controlled_queue.h18
-rw-r--r--library/cpp/actors/helpers/future_callback.h33
-rw-r--r--library/cpp/actors/helpers/mon_histogram_helper.h86
-rw-r--r--library/cpp/actors/helpers/pool_stats_collector.h314
-rw-r--r--library/cpp/actors/helpers/selfping_actor.cpp183
-rw-r--r--library/cpp/actors/helpers/selfping_actor.h13
-rw-r--r--library/cpp/actors/helpers/selfping_actor_ut.cpp45
-rw-r--r--library/cpp/actors/helpers/ut/ya.make36
-rw-r--r--library/cpp/actors/helpers/ya.make25
-rw-r--r--library/cpp/actors/http/http.cpp653
-rw-r--r--library/cpp/actors/http/http.h703
-rw-r--r--library/cpp/actors/http/http_cache.cpp599
-rw-r--r--library/cpp/actors/http/http_cache.h27
-rw-r--r--library/cpp/actors/http/http_config.h19
-rw-r--r--library/cpp/actors/http/http_proxy.cpp314
-rw-r--r--library/cpp/actors/http/http_proxy.h239
-rw-r--r--library/cpp/actors/http/http_proxy_acceptor.cpp135
-rw-r--r--library/cpp/actors/http/http_proxy_incoming.cpp302
-rw-r--r--library/cpp/actors/http/http_proxy_outgoing.cpp298
-rw-r--r--library/cpp/actors/http/http_proxy_sock_impl.h262
-rw-r--r--library/cpp/actors/http/http_proxy_ssl.h131
-rw-r--r--library/cpp/actors/http/http_static.cpp95
-rw-r--r--library/cpp/actors/http/http_static.h9
-rw-r--r--library/cpp/actors/http/http_ut.cpp358
-rw-r--r--library/cpp/actors/http/ut/ya.make18
-rw-r--r--library/cpp/actors/http/ya.make33
-rw-r--r--library/cpp/actors/interconnect/channel_scheduler.h120
-rw-r--r--library/cpp/actors/interconnect/event_filter.h72
-rw-r--r--library/cpp/actors/interconnect/event_holder_pool.h128
-rw-r--r--library/cpp/actors/interconnect/events_local.h403
-rw-r--r--library/cpp/actors/interconnect/interconnect.h179
-rw-r--r--library/cpp/actors/interconnect/interconnect_address.cpp94
-rw-r--r--library/cpp/actors/interconnect/interconnect_address.h29
-rw-r--r--library/cpp/actors/interconnect/interconnect_channel.cpp176
-rw-r--r--library/cpp/actors/interconnect/interconnect_channel.h127
-rw-r--r--library/cpp/actors/interconnect/interconnect_common.h106
-rw-r--r--library/cpp/actors/interconnect/interconnect_counters.cpp692
-rw-r--r--library/cpp/actors/interconnect/interconnect_counters.h59
-rw-r--r--library/cpp/actors/interconnect/interconnect_handshake.cpp995
-rw-r--r--library/cpp/actors/interconnect/interconnect_handshake.h24
-rw-r--r--library/cpp/actors/interconnect/interconnect_impl.h45
-rw-r--r--library/cpp/actors/interconnect/interconnect_mon.cpp276
-rw-r--r--library/cpp/actors/interconnect/interconnect_mon.h15
-rw-r--r--library/cpp/actors/interconnect/interconnect_nameserver_base.h83
-rw-r--r--library/cpp/actors/interconnect/interconnect_nameserver_dynamic.cpp178
-rw-r--r--library/cpp/actors/interconnect/interconnect_nameserver_table.cpp86
-rw-r--r--library/cpp/actors/interconnect/interconnect_proxy_wrapper.cpp47
-rw-r--r--library/cpp/actors/interconnect/interconnect_proxy_wrapper.h12
-rw-r--r--library/cpp/actors/interconnect/interconnect_resolve.cpp174
-rw-r--r--library/cpp/actors/interconnect/interconnect_stream.cpp628
-rw-r--r--library/cpp/actors/interconnect/interconnect_stream.h131
-rw-r--r--library/cpp/actors/interconnect/interconnect_tcp_input_session.cpp476
-rw-r--r--library/cpp/actors/interconnect/interconnect_tcp_proxy.cpp936
-rw-r--r--library/cpp/actors/interconnect/interconnect_tcp_proxy.h537
-rw-r--r--library/cpp/actors/interconnect/interconnect_tcp_server.cpp117
-rw-r--r--library/cpp/actors/interconnect/interconnect_tcp_server.h57
-rw-r--r--library/cpp/actors/interconnect/interconnect_tcp_session.cpp1228
-rw-r--r--library/cpp/actors/interconnect/interconnect_tcp_session.h565
-rw-r--r--library/cpp/actors/interconnect/load.cpp405
-rw-r--r--library/cpp/actors/interconnect/load.h24
-rw-r--r--library/cpp/actors/interconnect/logging.h68
-rw-r--r--library/cpp/actors/interconnect/mock/ic_mock.cpp298
-rw-r--r--library/cpp/actors/interconnect/mock/ic_mock.h19
-rw-r--r--library/cpp/actors/interconnect/mock/tsan.supp1
-rw-r--r--library/cpp/actors/interconnect/mock/ya.make16
-rw-r--r--library/cpp/actors/interconnect/packet.cpp32
-rw-r--r--library/cpp/actors/interconnect/packet.h324
-rw-r--r--library/cpp/actors/interconnect/poller.h23
-rw-r--r--library/cpp/actors/interconnect/poller_actor.cpp294
-rw-r--r--library/cpp/actors/interconnect/poller_actor.h63
-rw-r--r--library/cpp/actors/interconnect/poller_actor_darwin.h95
-rw-r--r--library/cpp/actors/interconnect/poller_actor_linux.h114
-rw-r--r--library/cpp/actors/interconnect/poller_actor_win.h103
-rw-r--r--library/cpp/actors/interconnect/poller_tcp.cpp35
-rw-r--r--library/cpp/actors/interconnect/poller_tcp.h25
-rw-r--r--library/cpp/actors/interconnect/poller_tcp_unit.cpp126
-rw-r--r--library/cpp/actors/interconnect/poller_tcp_unit.h67
-rw-r--r--library/cpp/actors/interconnect/poller_tcp_unit_epoll.cpp125
-rw-r--r--library/cpp/actors/interconnect/poller_tcp_unit_epoll.h33
-rw-r--r--library/cpp/actors/interconnect/poller_tcp_unit_select.cpp86
-rw-r--r--library/cpp/actors/interconnect/poller_tcp_unit_select.h19
-rw-r--r--library/cpp/actors/interconnect/profiler.h142
-rw-r--r--library/cpp/actors/interconnect/slowpoke_actor.h47
-rw-r--r--library/cpp/actors/interconnect/types.cpp564
-rw-r--r--library/cpp/actors/interconnect/types.h43
-rw-r--r--library/cpp/actors/interconnect/ut/channel_scheduler_ut.cpp115
-rw-r--r--library/cpp/actors/interconnect/ut/dynamic_proxy_ut.cpp179
-rw-r--r--library/cpp/actors/interconnect/ut/event_holder_pool_ut.cpp59
-rw-r--r--library/cpp/actors/interconnect/ut/interconnect_ut.cpp177
-rw-r--r--library/cpp/actors/interconnect/ut/large.cpp85
-rw-r--r--library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h84
-rw-r--r--library/cpp/actors/interconnect/ut/lib/interrupter.h249
-rw-r--r--library/cpp/actors/interconnect/ut/lib/node.h137
-rw-r--r--library/cpp/actors/interconnect/ut/lib/test_actors.h83
-rw-r--r--library/cpp/actors/interconnect/ut/lib/test_events.h49
-rw-r--r--library/cpp/actors/interconnect/ut/lib/ya.make12
-rw-r--r--library/cpp/actors/interconnect/ut/poller_actor_ut.cpp264
-rw-r--r--library/cpp/actors/interconnect/ut/protos/interconnect_test.proto25
-rw-r--r--library/cpp/actors/interconnect/ut/protos/ya.make11
-rw-r--r--library/cpp/actors/interconnect/ut/ya.make36
-rw-r--r--library/cpp/actors/interconnect/ut_fat/main.cpp133
-rw-r--r--library/cpp/actors/interconnect/ut_fat/ya.make25
-rw-r--r--library/cpp/actors/interconnect/watchdog_timer.h68
-rw-r--r--library/cpp/actors/interconnect/ya.make94
-rw-r--r--library/cpp/actors/memory_log/memlog.cpp367
-rw-r--r--library/cpp/actors/memory_log/memlog.h211
-rw-r--r--library/cpp/actors/memory_log/mmap.cpp63
-rw-r--r--library/cpp/actors/memory_log/ya.make19
-rw-r--r--library/cpp/actors/prof/tag.cpp119
-rw-r--r--library/cpp/actors/prof/tag.h73
-rw-r--r--library/cpp/actors/prof/tcmalloc.cpp32
-rw-r--r--library/cpp/actors/prof/tcmalloc.h9
-rw-r--r--library/cpp/actors/prof/tcmalloc_null.cpp10
-rw-r--r--library/cpp/actors/prof/ut/tag_ut.cpp68
-rw-r--r--library/cpp/actors/prof/ut/ya.make12
-rw-r--r--library/cpp/actors/prof/ya.make33
-rw-r--r--library/cpp/actors/protos/actors.proto13
-rw-r--r--library/cpp/actors/protos/interconnect.proto113
-rw-r--r--library/cpp/actors/protos/services_common.proto21
-rw-r--r--library/cpp/actors/protos/unittests.proto20
-rw-r--r--library/cpp/actors/protos/ya.make14
-rw-r--r--library/cpp/actors/testlib/decorator_ut.cpp327
-rw-r--r--library/cpp/actors/testlib/test_runtime.cpp1902
-rw-r--r--library/cpp/actors/testlib/test_runtime.h716
-rw-r--r--library/cpp/actors/testlib/ut/ya.make20
-rw-r--r--library/cpp/actors/testlib/ya.make27
-rw-r--r--library/cpp/actors/util/affinity.cpp93
-rw-r--r--library/cpp/actors/util/affinity.h49
-rw-r--r--library/cpp/actors/util/cpumask.h133
-rw-r--r--library/cpp/actors/util/datetime.h82
-rw-r--r--library/cpp/actors/util/defs.h16
-rw-r--r--library/cpp/actors/util/funnel_queue.h240
-rw-r--r--library/cpp/actors/util/futex.h13
-rw-r--r--library/cpp/actors/util/intrinsics.h97
-rw-r--r--library/cpp/actors/util/local_process_key.h132
-rw-r--r--library/cpp/actors/util/named_tuple.h30
-rw-r--r--library/cpp/actors/util/queue_chunk.h29
-rw-r--r--library/cpp/actors/util/queue_oneone_inplace.h118
-rw-r--r--library/cpp/actors/util/recentwnd.h67
-rw-r--r--library/cpp/actors/util/rope.h1161
-rw-r--r--library/cpp/actors/util/rope_cont_deque.h181
-rw-r--r--library/cpp/actors/util/rope_cont_list.h159
-rw-r--r--library/cpp/actors/util/rope_ut.cpp231
-rw-r--r--library/cpp/actors/util/should_continue.cpp23
-rw-r--r--library/cpp/actors/util/should_continue.h22
-rw-r--r--library/cpp/actors/util/thread.h26
-rw-r--r--library/cpp/actors/util/threadparkpad.cpp148
-rw-r--r--library/cpp/actors/util/threadparkpad.h21
-rw-r--r--library/cpp/actors/util/ticket_lock.h48
-rw-r--r--library/cpp/actors/util/timerfd.h65
-rw-r--r--library/cpp/actors/util/unordered_cache.h201
-rw-r--r--library/cpp/actors/util/unordered_cache_ut.cpp138
-rw-r--r--library/cpp/actors/util/ut/ya.make18
-rw-r--r--library/cpp/actors/util/ya.make37
-rw-r--r--library/cpp/actors/wilson/wilson_event.h181
-rw-r--r--library/cpp/actors/wilson/wilson_trace.h161
-rw-r--r--library/cpp/actors/wilson/ya.make14
-rw-r--r--library/cpp/actors/ya.make16
271 files changed, 48185 insertions, 0 deletions
diff --git a/library/cpp/actors/README.md b/library/cpp/actors/README.md
new file mode 100644
index 0000000000..c39908f2f5
--- /dev/null
+++ b/library/cpp/actors/README.md
@@ -0,0 +1,107 @@
+## Actor library
+
+### Часть первая, вводная.
+Иногда приходится разрабатывать асинхронные, существенно параллельные, местами распределённые программы. Иногда еще и внутренняя логика нетривиальна, разнородна, пишется разными командами не один год. Всё как мы любим. Человечеством придумано не так много способов внутренней организации структуры и кода таких программ. Большинство из них плохие (и именно из-за плохих подходов разработка асинхронных, многопоточных программ приобрела дурную славу). Некоторые получше. А серебряной пули как обычно нет.
+
+Когда мы начинали разработку Yandex Database (тогда еще KiKiMR), сразу было понятно что простыми наколеночными поделиями обойтись (и сделать при этом хорошо, так что бы не было стыдно) не получится. В качестве базиса мы выбрали мессадж-пассинг и модель акторов. И не пожалели. Постепенно этот подход распространился на смежные проекты.
+
+### Базовые концепции.
+Если отбросить шелуху – представляем сервис (программу в случае запуска изолированного бинарника) как ансамбль независимых агентов, взаимодействующих через отправку асинхронных сообщений внутри общего окружения. Тут все слова важны:
+
+Независимых – не разделяют состояние и поток выполнения.
+Передача сообщений – формализуем протоколы, а не интерфейсы.
+
+Асинхронная – не блокируемся на отправке сообщений.
+Общее окружение – все агенты разделяют общий пул ресурсов и каждый из них, зная адрес, может послать сообщение каждому.
+
+В более хайповых терминах – очень похоже на колокейтед микросервисы, только уровнем ниже. И да, мы заведомо не хотели прятать асинхронщину и параллелизм от разработчика, показывая прям самое мясо.
+
+### IActor.
+https://a.yandex-team.ru/arc/trunk/arcadia/library/actors/core/actor.h?rev=5315854#L105
+Базовый класс всех агентов, напрямую обычно не используется. Инстанцируется либо TActor, либо TActorBootstrapped. Фактически весь полезный код программы размещается в акторах.
+(важное замечание – в коде увидите ручки с TActorContext и без него, схожие по названию и назначению. На данный момент вариант с TActorContext является устаревшим, новый код стоит писать без его использования).
+Важные методы:
+
+PassAway – единственный корректный способ зарегистрированному актору умереть. Может вызываться только находясь внутри обработчика сообщения.
+Send – отправка сообщения, зная адрес получателя. В акторе доступен хелпер, принимающий непосредственно сообщение. Базовый вызов, принимающий полный event handle – доступен в контексте.
+
+Become – установить функцию-обработчик сообщений, которая будет использована при получении следующего сообщения.
+
+Register – зарегистрировать новый актор в акторсистеме, с выделением нового мейлбокса. Важно – с момента вызова владение актором передается акторсистеме, т.е. уже к моменту выхода актор может начать выполняться на другом потоке, нельзя к нему ни обращаться прямыми вызовами, ни даже предполагать что он еще жив.
+
+Schedule – зарегистрировать сообщение, которое будет отправлено не менее чем через запрошенную задержку. В акторе доступен хелпер, декорирующий сообщение хендлом отправки самому себе, в контексте можно передать полный хендл.
+
+SelfId – узнать собственный адрес. Возвращаемый объект TActorIdentity можно передавать если требуется делегировать отправку сообщений от имени актора (например если пишете полезный код пользуясь пассивными объектами).
+Посылка сообщений дешёвая, не нужно на ней чрезмерно экономить (но не бесплатная – поэтому посылать сообщения только ради посылки сообщений то же не стоит).
+
+Инстанцирование акторов так же дёшево, актор на запрос или фазу запроса – вполне нормальная практика. Мультиплексировать обработку разных запросов в одном акторе – так же вполне нормально. В нашем коде много примеров и первого, и второго. Пользуйтесь здравым смыслов и собственным вкусом.
+Т.к. на время обработки сообщения актор занимает тред из пула акторсистемы – уходить в длинные вычисления лучше на отдельном отселённом акторе (и либо отселять в отдельный пол акторсистемы, либо контролировать параллельность брокером ресурсов), блокирующие вызовы делать почти всегда ошибка. Стремление написать мютекс - ересь и от лукавого.
+Идентифицируются акторы своим TActorID-ом, который уникален и вы не должны его придумывать из воздуха, а только получить из регистрации (для порождённых акторов) или его вам должен рассказать кто-то, законно его знающий.
+
+Отправка на несуществующий актор (уже умерший) безопасна, сообщение будет просто выброшено в момент обработки (как обрабатывать недоставку сообщений в протоколах расскажу ниже).
+
+Кроме нормальных TActorID существуют еще и сервисные (составленные из строчки и номера ноды). Под ними может быть зарегистрирован реальный актор и фактически при получении сообщения по сервисному адресу – попробует переправить его текущему фактическому. Это позволяет размещать хорошо известные сервисы по хорошо известному адресу, не выстраивая параллельную машинерию поиска.
+
+Строить из актора конечный автомат при помощи переключений функции-обработчика – выбор в каждом конкретном случае, иногда удобнее да, иногда сваливать всё в одно состояние, а иногда – применять гибридное решение (когда часть жизненного цикла – обычно инициализации и завершение – выражены в переходах, а часть – нет).
+Меньше слов, больше дела – этого уже достаточно что бы прочитать самый простой пример. https://a.yandex-team.ru/arc/trunk/arcadia/library/actors/examples/01_ping_pong
+Здесь можно увидеть образец самого простого актора, занимающегося переброской сообщений и использующего все основные вызовы. Заодно покрутив за разные ручки (количество тредов в тредпуле, количество пар перебрасывающихся акторов) можно посмотреть на изменение поведения системы (hint: в таких простых сценариях максимум перфоманса достигается при одном треде в тредпулах).
+
+### Event и Event Handle.
+Полезную нагрузку сообщений заворачиваем в наследника IEventBase, у которого два важных метода – сериализация и загрузка. Сериализация виртуальная, а вот загрузка – нет, и для разбора сообщения из байтовой последовательности – необходимо на стороне получателя сматчить число-идентификатор типа ивента с С++ типом. Именно это делают макросы из hfunc.h. На практике ивенты создаются либо как наследник TEventLocal<> (для строго локальных сообщений) либо как наследник TEventPB<> (для потенциально пересылаемых по сети сообщений, типизируются protobuf-мессаджем).
+
+Кроме непосредственно ивента (в виде структуры либо в виде байтовой строки) для пересылки сообщения необходим набор дополнительных полей
+
+Адресат
+
+Отправитель
+
+Тип сообщения
+
+Кука
+
+Флаги
+
+Сообщение + дополнительные поля = IEventHandle. Именно хендлами акторсистема и оперирует. <event-type>::TPtr – в примере выше – это и есть указатель на типизированный хендл.
+
+Технически типом сообщения может быть любое число, которое получатель и отправитель договорились понимать как идентификатор сообщения. Сложившаяся практика – выделять диапазон идентификаторов макросом EventSpaceBegin (фактически блоками по 64к), начиная с блока ES_USERSPACE.
+Кука – неинтерпретируемое ui64 число, передаваемое с хендлом. Хорошей практикой является в ответе сервиса на сообщение выставлять куку в куку исходного сообщения, особенно для сервисов, потенциально используемых конкурентно.
+
+В флагах несколько бит зарезервировано под флаги, декларирующие как необходимо обрабатывать особые ситуации и 12 бит – под номер канала интерконнекта, в котором будет пересылаться сообщение (для локальных сообщений в имеющихся реализациях номер канала не имеет значения - хотя можно представить реализацию где для каналов будут независимые очереди).
+
+### Тредпулы и мейлбоксы.
+В рамках одной акторсистемы может сосуществовать несколько независимых тредпулов, каждый актор регистрируется на конкретном и в процессе жизни не может мигрировать (но может создавать новые акторы на произвольном тредпуле). Используется для крупноблочного разделения ресурсов, либо между разными активностями (вот здесь – обрабатываем один класс запросов, а вот здесь - другой), либо между разными профилями активности (вот здесь обрабатываем быстрые запросы, здесь – медленные, а вот там – вообще батчёвые). Например в YDB работает системный тредпул (в котором запускаются акторы, необходимые для функционирования YDB, и для которого мы следим что бы не было длительной блокировки в обработчиках), пользовательский тредпул (в котором обрабатываются запросы и потенциально обработчики могут уходить в себя подольше, но это не повлияет на инфраструктуру), батчёвый тредпул (куда отгружается длительная обработка – компакшены дисков, сканы таблиц и подобное) и, в жирных нодах – тредпул интерконнекта (как наиболее чувствительного к задержкам).
+Пересылка сообщений между акторами разных тредпулов но одной локальной акторсистемы остаётся локальной, принудительной сериализации сообщения не происходит.
+
+При регистрации актор прикрепляется к мейлбоксу (в типичном случае на собственном мейлбоксе, но по особой нужде можно находясь внутри обработки сообщения прикрепить порождённый актор к текущему активному мейлбоксу – см. RegisterWithSameMailbox (ранее RegisterLocal) – в этом случае будет гарантироваться отсутствие конкурентной обработки сообщений). Собственно Send – это и есть заворачивание ивента в хендл, помещение хендла в очередь мейлбокса и добавление мейлбокса в очередь активации тредпула. В рамках одного мейлбокса – обработка FIFO, между мейлбоксами таких гарантий нет, хотя и стараемся активировать мейлбоксы примерно в порядке появления в них сообщений.
+
+При регистрации актора можно выбрать тип мейлбокса, они немного отличаются стоимость добавления – либо дёшево, но похуже под контеншеном, либо почти wait-free, но подороже. См. комментарии к TMailboxType за актуальными подсказками что-как.
+
+Полезные хелперы.
+
+STFUNC – декларация стейт-функции, рекомендую всегда использовать именно такую форму для декларации, т.к. потом проще искать.
+
+hFunc – макрос выбора хендлера, передающий ивент в обработчик.
+
+cFunc – макрос выбора хендлера, не передающий ивент в обработчик.
+
+### Обработка сбоев.
+В рамках локальной акторсистемы доставка сообщений гарантирована, если по какой-то причине сообщение не доставлено (важно! Именно не доставлено, факт обработанности сообщения уже на совести принимающего актора) – то произойдёт одно из:
+
+Если выставлен флаг FlagForwardOnNondelivery – сообщение будет переправлено на актор, переданный как forwardOnNondelivery при конструировании хендла. Полезно например если какие-то сервисы создаются по требованию и для несозданных сервисов – желаем зароутить в роутер. Работает только в рамках локальной акторсистемы.
+
+Иначе при выставленном флаге FlagTrackDelivery – для отправителя будет сгенерирован ивент TEvUndelivered от имени недоступного актора. Получение такого сообщения гарантирует что исходный ивент не был обработан и никакие эффекты не произошли. Генерация и доставка нотификации в рамках локальной акторсистемы гарантирована, в распределённой – как повезёт, может и потеряться.
+
+Иначе, если никакие флаги не выставлены – сообщение будет выброшено.
+
+Т.к. в распределённой системе доставка нотификаций о недоставке не гарантируется, то для надёжной обработки сбоев необходим дополнительный механизм – по флагу FlagSubscribeOnSession при пересечении границ ноды происходит подписка отправителя на нотификацию о разрыве сетевой сессии, в рамках которой сообщение было отправлено. Теперь при разрыве сетевой сессии отправитель узнает что сообщение могло быть недоставлено (а могло и быть доставлено – мы не знаем) и сможет отреагировать. Нужно не забывать отписываться от нотификации о разрыве сессий – иначе будут копиться вплоть до ближайшего разрыва (который может и не скоро произойти).
+
+Резюмируя: при необходимости контролировать доставку внутри локальной акторсистемы – выставляем флаг FlagTrackDelivery и обрабатываем TEvUndelivered. Для распределённой – добавляем FlagSubscribeOnSession и дополнительно обрабатываем TEvNodeDisconnected не забывая отписываться от более не нужных подписок.
+
+### Интерконнект.
+Локальная акторсистема – это только половина пирога, возможность объединить их в распределённую – вторая половина. Реализация интерконнекта доступна из коробки и умеет
+Передавать сообщения через одно tcp-соединение
+Мультиплексировать разные потоки (ака каналы) в рамках одного соединения, гарантируя порядок в рамках канала
+Старается делать это хорошо.
+В рамках распределённой системы требуется каждой локальной акторсистеме назначить уникальный номер (например табличкой или реализовав динамическую раздачу номеров ноды) и запустить в рамках каждой локальной акторсистемы локальный неймсервис (например по табличке ремапинга номера ноды в сетевой адрес либо как кеш опорного неймсервиса).
+Смотрим на второй пример https://a.yandex-team.ru/arc/trunk/arcadia/library/actors/examples/02_discovery
+Тут у нас конфигурируется распределённая акторсистема (в примере все пять запускаются в одном бинарнике, но точно так же – можно запускать и частями) на пять нод. На каждой ноде запускается реплика для паблишинга строчек и актор-эндпоинт (каждый со своим портом). Эндпоинты с помощью актора-паблишера публикуют свои явки/пароли на распределённый сторадж (с обработкой нештатных ситауций и поддержанием в актуальном состоянии). И рядом лежит реализация запроса к стораджу на листинг опубликованого по мажорити. Собственно это упрощённый и почищенный от специфики код, используемый в YDB для публикации и нахождения актуальных эндпоинтов пользовательской базы.
diff --git a/library/cpp/actors/core/README.md b/library/cpp/actors/core/README.md
new file mode 100644
index 0000000000..439a8dd459
--- /dev/null
+++ b/library/cpp/actors/core/README.md
@@ -0,0 +1,99 @@
+## Memory tracker
+
+https://a.yandex-team.ru/arc/trunk/arcadia/library/cpp/actors/core/memory_track.h
+
+Использование:
+
+* отслеживание аллокаций экземпляров конкретного класса через new/delete и new[]/delete[]
+* отслеживание аллокаций в контейнерах
+* ручное отслеживание моментов аллокации/деаллокации
+
+----
+
+### Отслеживание аллокаций класса через new/delete
+
+Использование с автоматически генерируемой меткой:
+
+```cpp
+#include <library/cpp/actors/core/memory_track.h>
+
+struct TTypeLabeled
+ : public NActors::NMemory::TTrack<TTypeLabeled>
+{
+ char payload[16];
+};
+```
+
+Использование с пользовательским именем метки:
+
+```cpp
+#include <library/cpp/actors/core/memory_track.h>
+
+static const char NamedLabel[] = "NamedLabel";
+
+struct TNameLabeled
+ : public NActors::NMemory::TTrack<TNameLabeled, NamedLabel>
+{
+ char payload[32];
+};
+```
+
+----
+
+### Отслеживание аллокаций в контейнерах
+
+```cpp
+#include <library/cpp/actors/core/memory_track.h>
+
+static const char InContainerLabel[] = "InContainerLabel";
+
+struct TInContainer {
+ char payload[16];
+};
+
+std::vector<TInContainer, NActors::NMemory::TAlloc<TInContainer>> vecT;
+
+std::vector<TInContainer, NActors::NMemory::TAlloc<TInContainer, InContainerLabel>> vecN;
+
+using TKey = int;
+
+std::map<TKey, TInContainer, std::less<TKey>,
+ NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>>> mapT;
+
+std::map<TKey, TInContainer, std::less<TKey>,
+ NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>, InContainerLabel>> mapN;
+
+std::unordered_map<TKey, TInContainer, std::hash<TKey>, std::equal_to<TKey>,
+ NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>>> umapT;
+
+std::unordered_map<TKey, TInContainer, std::hash<TKey>, std::equal_to<TKey>,
+ NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>, InContainerLabel>> umapN;
+```
+
+----
+
+### Ручное отслеживание аллокаций/деаллокаций
+
+```cpp
+#include <library/cpp/actors/core/memory_track.h>
+
+static const char ManualLabel[] = "ManualLabel";
+
+...
+NActors::NMemory::TLabel<ManualLabel>::Add(size);
+
+...
+NActors::NMemory::TLabel<ManualLabel>::Sub(size);
+```
+
+----
+
+### Собираемые метрики
+
+Сервис **utils**, пользовательская метка **label**, сенсоры:
+
+- MT/Count: количество аллокаций в моменте
+- MT/Memory: аллоцированная память в моменте
+- MT/PeakCount: пиковое значение количества аллокаций (сэмплится с фиксированной частотой)
+- MT/PeakMemory: пиковое значение аллоцированной памяти
+
diff --git a/library/cpp/actors/core/actor.cpp b/library/cpp/actors/core/actor.cpp
new file mode 100644
index 0000000000..6f9ba6a42b
--- /dev/null
+++ b/library/cpp/actors/core/actor.cpp
@@ -0,0 +1,172 @@
+#include "actor.h"
+#include "executor_thread.h"
+#include "mailbox.h"
+#include <library/cpp/actors/util/datetime.h>
+
+namespace NActors {
+ Y_POD_THREAD(TActivationContext*)
+ TlsActivationContext((TActivationContext*)nullptr);
+
+ bool TActorContext::Send(const TActorId& recipient, IEventBase* ev, ui32 flags, ui64 cookie, NWilson::TTraceId traceId) const {
+ return Send(new IEventHandle(recipient, SelfID, ev, flags, cookie, nullptr, std::move(traceId)));
+ }
+
+ bool TActorContext::Send(TAutoPtr<IEventHandle> ev) const {
+ return ExecutorThread.Send(ev);
+ }
+
+ void IActor::Registered(TActorSystem* sys, const TActorId& owner) {
+ // fallback to legacy method, do not use it anymore
+ if (auto eh = AfterRegister(SelfId(), owner))
+ sys->Send(eh);
+ }
+
+ void IActor::Describe(IOutputStream &out) const noexcept {
+ SelfActorId.Out(out);
+ }
+
+ bool IActor::Send(const TActorId& recipient, IEventBase* ev, ui32 flags, ui64 cookie, NWilson::TTraceId traceId) const noexcept {
+ return SelfActorId.Send(recipient, ev, flags, cookie, std::move(traceId));
+ }
+
+ bool TActivationContext::Send(TAutoPtr<IEventHandle> ev) {
+ return TlsActivationContext->ExecutorThread.Send(ev);
+ }
+
+ void TActivationContext::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) {
+ TlsActivationContext->ExecutorThread.Schedule(deadline, ev, cookie);
+ }
+
+ void TActivationContext::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) {
+ TlsActivationContext->ExecutorThread.Schedule(deadline, ev, cookie);
+ }
+
+ void TActivationContext::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) {
+ TlsActivationContext->ExecutorThread.Schedule(delta, ev, cookie);
+ }
+
+ bool TActorIdentity::Send(const TActorId& recipient, IEventBase* ev, ui32 flags, ui64 cookie, NWilson::TTraceId traceId) const {
+ return TActivationContext::Send(new IEventHandle(recipient, *this, ev, flags, cookie, nullptr, std::move(traceId)));
+ }
+
+ void TActorIdentity::Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie) const {
+ return TActivationContext::Schedule(deadline, new IEventHandle(*this, {}, ev), cookie);
+ }
+
+ void TActorIdentity::Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie) const {
+ return TActivationContext::Schedule(deadline, new IEventHandle(*this, {}, ev), cookie);
+ }
+
+ void TActorIdentity::Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie) const {
+ return TActivationContext::Schedule(delta, new IEventHandle(*this, {}, ev), cookie);
+ }
+
+ TActorId TActivationContext::RegisterWithSameMailbox(IActor* actor, TActorId parentId) {
+ Y_VERIFY_DEBUG(parentId);
+ auto& ctx = *TlsActivationContext;
+ return ctx.ExecutorThread.RegisterActor(actor, &ctx.Mailbox, parentId.Hint(), parentId);
+ }
+
+ TActorId TActorContext::RegisterWithSameMailbox(IActor* actor) const {
+ return ExecutorThread.RegisterActor(actor, &Mailbox, SelfID.Hint(), SelfID);
+ }
+
+ TActorId IActor::RegisterWithSameMailbox(IActor* actor) const noexcept {
+ return TlsActivationContext->ExecutorThread.RegisterActor(actor, &TlsActivationContext->Mailbox, SelfActorId.Hint(), SelfActorId);
+ }
+
+ TActorId TActivationContext::Register(IActor* actor, TActorId parentId, TMailboxType::EType mailboxType, ui32 poolId) {
+ return TlsActivationContext->ExecutorThread.RegisterActor(actor, mailboxType, poolId, parentId);
+ }
+
+ TActorId TActivationContext::InterconnectProxy(ui32 destinationNodeId) {
+ return TlsActivationContext->ExecutorThread.ActorSystem->InterconnectProxy(destinationNodeId);
+ }
+
+ TActorSystem* TActivationContext::ActorSystem() {
+ return TlsActivationContext->ExecutorThread.ActorSystem;
+ }
+
+ i64 TActivationContext::GetCurrentEventTicks() {
+ return GetCycleCountFast() - TlsActivationContext->EventStart;
+ }
+
+ double TActivationContext::GetCurrentEventTicksAsSeconds() {
+ return NHPTimer::GetSeconds(GetCurrentEventTicks());
+ }
+
+ TActorId TActorContext::Register(IActor* actor, TMailboxType::EType mailboxType, ui32 poolId) const {
+ return ExecutorThread.RegisterActor(actor, mailboxType, poolId, SelfID);
+ }
+
+ TActorId IActor::Register(IActor* actor, TMailboxType::EType mailboxType, ui32 poolId) const noexcept {
+ return TlsActivationContext->ExecutorThread.RegisterActor(actor, mailboxType, poolId, SelfActorId);
+ }
+
+ void TActorContext::Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie) const {
+ ExecutorThread.Schedule(deadline, new IEventHandle(SelfID, TActorId(), ev), cookie);
+ }
+
+ void TActorContext::Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie) const {
+ ExecutorThread.Schedule(deadline, new IEventHandle(SelfID, TActorId(), ev), cookie);
+ }
+
+ void TActorContext::Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie) const {
+ ExecutorThread.Schedule(delta, new IEventHandle(SelfID, TActorId(), ev), cookie);
+ }
+
+ void IActor::Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie) const noexcept {
+ TlsActivationContext->ExecutorThread.Schedule(deadline, new IEventHandle(SelfActorId, TActorId(), ev), cookie);
+ }
+
+ void IActor::Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie) const noexcept {
+ TlsActivationContext->ExecutorThread.Schedule(deadline, new IEventHandle(SelfActorId, TActorId(), ev), cookie);
+ }
+
+ void IActor::Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie) const noexcept {
+ TlsActivationContext->ExecutorThread.Schedule(delta, new IEventHandle(SelfActorId, TActorId(), ev), cookie);
+ }
+
+ TInstant TActivationContext::Now() {
+ return TlsActivationContext->ExecutorThread.ActorSystem->Timestamp();
+ }
+
+ TMonotonic TActivationContext::Monotonic() {
+ return TlsActivationContext->ExecutorThread.ActorSystem->Monotonic();
+ }
+
+ TInstant TActorContext::Now() const {
+ return ExecutorThread.ActorSystem->Timestamp();
+ }
+
+ TMonotonic TActorContext::Monotonic() const {
+ return ExecutorThread.ActorSystem->Monotonic();
+ }
+
+ NLog::TSettings* TActivationContext::LoggerSettings() const {
+ return ExecutorThread.ActorSystem->LoggerSettings();
+ }
+
+ std::pair<ui32, ui32> TActorContext::CountMailboxEvents(ui32 maxTraverse) const {
+ return Mailbox.CountMailboxEvents(SelfID.LocalId(), maxTraverse);
+ }
+
+ std::pair<ui32, ui32> IActor::CountMailboxEvents(ui32 maxTraverse) const {
+ return TlsActivationContext->Mailbox.CountMailboxEvents(SelfActorId.LocalId(), maxTraverse);
+ }
+
+ void IActor::Die(const TActorContext& ctx) {
+ if (ctx.SelfID)
+ Y_VERIFY(ctx.SelfID == SelfActorId);
+ PassAway();
+ }
+
+ void IActor::PassAway() {
+ auto& cx = *TlsActivationContext;
+ cx.ExecutorThread.UnregisterActor(&cx.Mailbox, SelfActorId.LocalId());
+ }
+
+ double IActor::GetElapsedTicksAsSeconds() const {
+ return NHPTimer::GetSeconds(ElapsedTicks);
+ }
+}
diff --git a/library/cpp/actors/core/actor.h b/library/cpp/actors/core/actor.h
new file mode 100644
index 0000000000..ed29bd14b9
--- /dev/null
+++ b/library/cpp/actors/core/actor.h
@@ -0,0 +1,530 @@
+#pragma once
+
+#include "event.h"
+#include "monotonic.h"
+#include <util/system/tls.h>
+#include <library/cpp/actors/util/local_process_key.h>
+
+namespace NActors {
+ class TActorSystem;
+ class TMailboxTable;
+ struct TMailboxHeader;
+
+ class TExecutorThread;
+ class IActor;
+ class ISchedulerCookie;
+
+ namespace NLog {
+ struct TSettings;
+ }
+
+ struct TActorContext;
+
+ struct TActivationContext {
+ public:
+ TMailboxHeader& Mailbox;
+ TExecutorThread& ExecutorThread;
+ const NHPTimer::STime EventStart;
+
+ protected:
+ explicit TActivationContext(TMailboxHeader& mailbox, TExecutorThread& executorThread, NHPTimer::STime eventStart)
+ : Mailbox(mailbox)
+ , ExecutorThread(executorThread)
+ , EventStart(eventStart)
+ {
+ }
+
+ public:
+ static bool Send(TAutoPtr<IEventHandle> ev);
+
+ /**
+ * Schedule one-shot event that will be send at given time point in the future.
+ *
+ * @param deadline the wallclock time point in future when event must be send
+ * @param ev the event to send
+ * @param cookie cookie that will be piggybacked with event
+ */
+ static void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr);
+
+ /**
+ * Schedule one-shot event that will be send at given time point in the future.
+ *
+ * @param deadline the monotonic time point in future when event must be send
+ * @param ev the event to send
+ * @param cookie cookie that will be piggybacked with event
+ */
+ static void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr);
+
+ /**
+ * Schedule one-shot event that will be send after given delay.
+ *
+ * @param delta the time from now to delay event sending
+ * @param ev the event to send
+ * @param cookie cookie that will be piggybacked with event
+ */
+ static void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr);
+
+ static TInstant Now();
+ static TMonotonic Monotonic();
+ NLog::TSettings* LoggerSettings() const;
+
+ // register new actor in ActorSystem on new fresh mailbox.
+ static TActorId Register(IActor* actor, TActorId parentId = TActorId(), TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>());
+
+ // Register new actor in ActorSystem on same _mailbox_ as current actor.
+ // There is one thread per mailbox to execute actor, which mean
+ // no _cpu core scalability_ for such actors.
+ // This method of registration can be usefull if multiple actors share
+ // some memory.
+ static TActorId RegisterWithSameMailbox(IActor* actor, TActorId parentId);
+
+ static const TActorContext& AsActorContext();
+ static TActorContext ActorContextFor(TActorId id);
+
+ static TActorId InterconnectProxy(ui32 nodeid);
+ static TActorSystem* ActorSystem();
+
+ static i64 GetCurrentEventTicks();
+ static double GetCurrentEventTicksAsSeconds();
+ };
+
+ struct TActorContext: public TActivationContext {
+ const TActorId SelfID;
+
+ explicit TActorContext(TMailboxHeader& mailbox, TExecutorThread& executorThread, NHPTimer::STime eventStart, const TActorId& selfID)
+ : TActivationContext(mailbox, executorThread, eventStart)
+ , SelfID(selfID)
+ {
+ }
+
+ bool Send(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const;
+ template <typename TEvent>
+ bool Send(const TActorId& recipient, THolder<TEvent> ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const {
+ return Send(recipient, static_cast<IEventBase*>(ev.Release()), flags, cookie, std::move(traceId));
+ }
+ bool Send(TAutoPtr<IEventHandle> ev) const;
+
+ TInstant Now() const;
+ TMonotonic Monotonic() const;
+
+ /**
+ * Schedule one-shot event that will be send at given time point in the future.
+ *
+ * @param deadline the wallclock time point in future when event must be send
+ * @param ev the event to send
+ * @param cookie cookie that will be piggybacked with event
+ */
+ void Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const;
+
+ /**
+ * Schedule one-shot event that will be send at given time point in the future.
+ *
+ * @param deadline the monotonic time point in future when event must be send
+ * @param ev the event to send
+ * @param cookie cookie that will be piggybacked with event
+ */
+ void Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const;
+
+ /**
+ * Schedule one-shot event that will be send after given delay.
+ *
+ * @param delta the time from now to delay event sending
+ * @param ev the event to send
+ * @param cookie cookie that will be piggybacked with event
+ */
+ void Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const;
+
+ TActorContext MakeFor(const TActorId& otherId) const {
+ return TActorContext(Mailbox, ExecutorThread, EventStart, otherId);
+ }
+
+ // register new actor in ActorSystem on new fresh mailbox.
+ TActorId Register(IActor* actor, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>()) const;
+
+ // Register new actor in ActorSystem on same _mailbox_ as current actor.
+ // There is one thread per mailbox to execute actor, which mean
+ // no _cpu core scalability_ for such actors.
+ // This method of registration can be usefull if multiple actors share
+ // some memory.
+ TActorId RegisterWithSameMailbox(IActor* actor) const;
+
+ std::pair<ui32, ui32> CountMailboxEvents(ui32 maxTraverse = Max<ui32>()) const;
+ };
+
+ extern Y_POD_THREAD(TActivationContext*) TlsActivationContext;
+
+ struct TActorIdentity: public TActorId {
+ explicit TActorIdentity(TActorId actorId)
+ : TActorId(actorId)
+ {
+ }
+
+ void operator=(TActorId actorId) {
+ *this = TActorIdentity(actorId);
+ }
+
+ bool Send(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const;
+ void Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const;
+ void Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const;
+ void Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const;
+ };
+
+ class IActor;
+
+ class IActorOps : TNonCopyable {
+ public:
+ virtual void Describe(IOutputStream&) const noexcept = 0;
+ virtual bool Send(const TActorId& recipient, IEventBase*, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const noexcept = 0;
+
+ /**
+ * Schedule one-shot event that will be send at given time point in the future.
+ *
+ * @param deadline the wallclock time point in future when event must be send
+ * @param ev the event to send
+ * @param cookie cookie that will be piggybacked with event
+ */
+ virtual void Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept = 0;
+
+ /**
+ * Schedule one-shot event that will be send at given time point in the future.
+ *
+ * @param deadline the monotonic time point in future when event must be send
+ * @param ev the event to send
+ * @param cookie cookie that will be piggybacked with event
+ */
+ virtual void Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept = 0;
+
+ /**
+ * Schedule one-shot event that will be send after given delay.
+ *
+ * @param delta the time from now to delay event sending
+ * @param ev the event to send
+ * @param cookie cookie that will be piggybacked with event
+ */
+ virtual void Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept = 0;
+
+ virtual TActorId Register(IActor*, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>()) const noexcept = 0;
+ virtual TActorId RegisterWithSameMailbox(IActor*) const noexcept = 0;
+ };
+
+ class TDecorator;
+
+ class IActor : protected IActorOps {
+ public:
+ typedef void (IActor::*TReceiveFunc)(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx);
+
+ private:
+ TReceiveFunc StateFunc;
+ TActorIdentity SelfActorId;
+ i64 ElapsedTicks;
+ ui64 HandledEvents;
+
+ friend void DoActorInit(TActorSystem*, IActor*, const TActorId&, const TActorId&);
+ friend class TDecorator;
+
+ public:
+ /// @sa services.proto NKikimrServices::TActivity::EType
+ enum EActorActivity {
+ OTHER = 0,
+ ACTOR_SYSTEM = 1,
+ ACTORLIB_COMMON = 2,
+ ACTORLIB_STATS = 3,
+ LOG_ACTOR = 4,
+ INTERCONNECT_PROXY_TCP = 12,
+ INTERCONNECT_SESSION_TCP = 13,
+ INTERCONNECT_COMMON = 171,
+ SELF_PING_ACTOR = 207,
+ TEST_ACTOR_RUNTIME = 283,
+ INTERCONNECT_HANDSHAKE = 284,
+ INTERCONNECT_POLLER = 285,
+ INTERCONNECT_SESSION_KILLER = 286,
+ ACTOR_SYSTEM_SCHEDULER_ACTOR = 312,
+ ACTOR_FUTURE_CALLBACK = 337,
+ INTERCONNECT_MONACTOR = 362,
+ INTERCONNECT_LOAD_ACTOR = 376,
+ INTERCONNECT_LOAD_RESPONDER = 377,
+ NAMESERVICE = 450,
+ DNS_RESOLVER = 481,
+ INTERCONNECT_PROXY_WRAPPER = 546,
+ };
+
+ using EActivityType = EActorActivity;
+ ui32 ActivityType;
+
+ protected:
+ IActor(TReceiveFunc stateFunc, ui32 activityType = OTHER)
+ : StateFunc(stateFunc)
+ , SelfActorId(TActorId())
+ , ElapsedTicks(0)
+ , HandledEvents(0)
+ , ActivityType(activityType)
+ {
+ }
+
+ public:
+ virtual ~IActor() {
+ } // must not be called for registered actors, see Die method instead
+
+ protected:
+ virtual void Die(const TActorContext& ctx); // would unregister actor so call exactly once and only from inside of message processing
+ virtual void PassAway();
+
+ public:
+ template <typename T>
+ void Become(T stateFunc) {
+ StateFunc = static_cast<TReceiveFunc>(stateFunc);
+ }
+
+ template <typename T, typename... TArgs>
+ void Become(T stateFunc, const TActorContext& ctx, TArgs&&... args) {
+ StateFunc = static_cast<TReceiveFunc>(stateFunc);
+ ctx.Schedule(std::forward<TArgs>(args)...);
+ }
+
+ template <typename T, typename... TArgs>
+ void Become(T stateFunc, TArgs&&... args) {
+ StateFunc = static_cast<TReceiveFunc>(stateFunc);
+ Schedule(std::forward<TArgs>(args)...);
+ }
+
+ protected:
+ void SetActivityType(ui32 activityType) {
+ ActivityType = activityType;
+ }
+
+ public:
+ TReceiveFunc CurrentStateFunc() const {
+ return StateFunc;
+ }
+
+ // NOTE: exceptions must not escape state function but if an exception hasn't be caught
+ // by the actor then we want to crash an see the stack
+ void Receive(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) {
+ (this->*StateFunc)(ev, ctx);
+ HandledEvents++;
+ }
+
+ // must be called to wrap any call trasitions from one actor to another
+ template<typename TActor, typename TMethod, typename... TArgs>
+ static decltype((std::declval<TActor>().*std::declval<TMethod>())(std::declval<TArgs>()...))
+ InvokeOtherActor(TActor& actor, TMethod&& method, TArgs&&... args) {
+ struct TRecurseContext : TActorContext {
+ TActivationContext *Prev;
+ TRecurseContext(const TActorId& actorId)
+ : TActorContext(TActivationContext::ActorContextFor(actorId))
+ , Prev(TlsActivationContext)
+ {
+ TlsActivationContext = this;
+ }
+ ~TRecurseContext() {
+ TlsActivationContext = Prev;
+ }
+ } context(actor.SelfId());
+ return (actor.*method)(std::forward<TArgs>(args)...);
+ }
+
+ virtual void Registered(TActorSystem* sys, const TActorId& owner);
+
+ virtual TAutoPtr<IEventHandle> AfterRegister(const TActorId& self, const TActorId& parentId) {
+ Y_UNUSED(self);
+ Y_UNUSED(parentId);
+ return TAutoPtr<IEventHandle>();
+ }
+
+ i64 GetElapsedTicks() const {
+ return ElapsedTicks;
+ }
+ double GetElapsedTicksAsSeconds() const;
+ void AddElapsedTicks(i64 ticks) {
+ ElapsedTicks += ticks;
+ }
+ auto GetActivityType() const {
+ return ActivityType;
+ }
+ ui64 GetHandledEvents() const {
+ return HandledEvents;
+ }
+ TActorIdentity SelfId() const {
+ return SelfActorId;
+ }
+
+ protected:
+ void Describe(IOutputStream&) const noexcept override;
+ bool Send(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const noexcept final;
+ template <typename TEvent>
+ bool Send(const TActorId& recipient, THolder<TEvent> ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const{
+ return Send(recipient, static_cast<IEventBase*>(ev.Release()), flags, cookie, std::move(traceId));
+ }
+
+ template <class TEvent, class ... TEventArgs>
+ bool Send(TActorId recipient, TEventArgs&& ... args) const {
+ return Send(recipient, MakeHolder<TEvent>(std::forward<TEventArgs>(args)...));
+ }
+
+ void Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept final;
+ void Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept final;
+ void Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept final;
+
+ // register new actor in ActorSystem on new fresh mailbox.
+ TActorId Register(IActor* actor, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>()) const noexcept final;
+
+ // Register new actor in ActorSystem on same _mailbox_ as current actor.
+ // There is one thread per mailbox to execute actor, which mean
+ // no _cpu core scalability_ for such actors.
+ // This method of registration can be usefull if multiple actors share
+ // some memory.
+ TActorId RegisterWithSameMailbox(IActor* actor) const noexcept final;
+
+ std::pair<ui32, ui32> CountMailboxEvents(ui32 maxTraverse = Max<ui32>()) const;
+
+ private:
+ void ChangeSelfId(TActorId actorId) {
+ SelfActorId = actorId;
+ }
+ };
+
+ struct TActorActivityTag {};
+
+ inline size_t GetActivityTypeCount() {
+ return TLocalProcessKeyState<TActorActivityTag>::GetInstance().GetCount();
+ }
+
+ inline TStringBuf GetActivityTypeName(size_t index) {
+ return TLocalProcessKeyState<TActorActivityTag>::GetInstance().GetNameByIndex(index);
+ }
+
+ template <typename TDerived>
+ class TActor: public IActor {
+ private:
+ template <typename T, typename = const char*>
+ struct HasActorName: std::false_type { };
+ template <typename T>
+ struct HasActorName<T, decltype((void)T::ActorName, (const char*)nullptr)>: std::true_type { };
+
+ static ui32 GetActivityTypeIndex() {
+ if constexpr(HasActorName<TDerived>::value) {
+ return TLocalProcessKey<TActorActivityTag, TDerived::ActorName>::GetIndex();
+ } else {
+ using TActorActivity = decltype(((TDerived*)nullptr)->ActorActivityType());
+ // if constexpr(std::is_enum<TActorActivity>::value) {
+ return TEnumProcessKey<TActorActivityTag, TActorActivity>::GetIndex(
+ TDerived::ActorActivityType());
+ //} else {
+ // for int, ui32, ...
+ // return TEnumProcessKey<TActorActivityTag, IActor::EActorActivity>::GetIndex(
+ // static_cast<IActor::EActorActivity>(TDerived::ActorActivityType()));
+ //}
+ }
+ }
+
+ protected:
+ //* Comment this function to find unmarked activities
+ static constexpr IActor::EActivityType ActorActivityType() {
+ return EActorActivity::OTHER;
+ } //*/
+
+ // static constexpr char ActorName[] = "UNNAMED";
+
+ TActor(void (TDerived::*func)(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx), ui32 activityType = GetActivityTypeIndex())
+ : IActor(static_cast<TReceiveFunc>(func), activityType)
+ { }
+
+ public:
+ typedef TDerived TThis;
+ };
+
+
+#define STFUNC_SIG TAutoPtr< ::NActors::IEventHandle>&ev, const ::NActors::TActorContext &ctx
+#define STATEFN_SIG TAutoPtr<::NActors::IEventHandle>& ev
+#define STFUNC(funcName) void funcName(TAutoPtr< ::NActors::IEventHandle>& ev, const ::NActors::TActorContext& ctx)
+#define STATEFN(funcName) void funcName(TAutoPtr< ::NActors::IEventHandle>& ev, const ::NActors::TActorContext& )
+
+#define STRICT_STFUNC(NAME, HANDLERS) \
+ void NAME(STFUNC_SIG) { \
+ Y_UNUSED(ctx); \
+ switch (const ui32 etype = ev->GetTypeRewrite()) { \
+ HANDLERS \
+ default: \
+ Y_VERIFY_DEBUG(false, "%s: unexpected message type 0x%08" PRIx32, __func__, etype); \
+ } \
+ }
+
+ inline const TActorContext& TActivationContext::AsActorContext() {
+ TActivationContext* tls = TlsActivationContext;
+ return *static_cast<TActorContext*>(tls);
+ }
+
+ inline TActorContext TActivationContext::ActorContextFor(TActorId id) {
+ auto& tls = *TlsActivationContext;
+ return TActorContext(tls.Mailbox, tls.ExecutorThread, tls.EventStart, id);
+ }
+
+ class TDecorator : public IActor {
+ protected:
+ THolder<IActor> Actor;
+
+ public:
+ TDecorator(THolder<IActor>&& actor)
+ : IActor(static_cast<TReceiveFunc>(&TDecorator::State), actor->GetActivityType())
+ , Actor(std::move(actor))
+ {
+ }
+
+ void Registered(TActorSystem* sys, const TActorId& owner) override {
+ Actor->ChangeSelfId(SelfId());
+ Actor->Registered(sys, owner);
+ }
+
+ virtual bool DoBeforeReceiving(TAutoPtr<IEventHandle>& /*ev*/, const TActorContext& /*ctx*/) {
+ return true;
+ }
+
+ virtual void DoAfterReceiving(const TActorContext& /*ctx*/)
+ {
+ }
+
+ STFUNC(State) {
+ if (DoBeforeReceiving(ev, ctx)) {
+ Actor->Receive(ev, ctx);
+ DoAfterReceiving(ctx);
+ }
+ }
+ };
+
+ // TTestDecorator doesn't work with the real actor system
+ struct TTestDecorator : public TDecorator {
+ TTestDecorator(THolder<IActor>&& actor)
+ : TDecorator(std::move(actor))
+ {
+ }
+
+ virtual ~TTestDecorator() = default;
+
+ // This method must be called in the test actor system
+ bool BeforeSending(TAutoPtr<IEventHandle>& ev)
+ {
+ bool send = true;
+ TTestDecorator *decorator = dynamic_cast<TTestDecorator*>(Actor.Get());
+ if (decorator) {
+ send = decorator->BeforeSending(ev);
+ }
+ return send && ev && DoBeforeSending(ev);
+ }
+
+ virtual bool DoBeforeSending(TAutoPtr<IEventHandle>& /*ev*/) {
+ return true;
+ }
+ };
+}
+
+template <>
+inline void Out<NActors::TActorIdentity>(IOutputStream& o, const NActors::TActorIdentity& x) {
+ return x.Out(o);
+}
+
+template <>
+struct THash<NActors::TActorIdentity> {
+ inline ui64 operator()(const NActors::TActorIdentity& x) const {
+ return x.Hash();
+ }
+};
diff --git a/library/cpp/actors/core/actor_bootstrapped.h b/library/cpp/actors/core/actor_bootstrapped.h
new file mode 100644
index 0000000000..a37887c939
--- /dev/null
+++ b/library/cpp/actors/core/actor_bootstrapped.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "actor.h"
+#include "events.h"
+
+namespace NActors {
+ template<typename T> struct dependent_false : std::false_type {};
+
+ template<typename TDerived>
+ class TActorBootstrapped : public TActor<TDerived> {
+ protected:
+ TAutoPtr<IEventHandle> AfterRegister(const TActorId& self, const TActorId& parentId) override {
+ return new IEventHandle(TEvents::TSystem::Bootstrap, 0, self, parentId, {}, 0);
+ }
+
+ STFUNC(StateBootstrap) {
+ Y_VERIFY(ev->GetTypeRewrite() == TEvents::TSystem::Bootstrap, "Unexpected bootstrap message");
+ using T = decltype(&TDerived::Bootstrap);
+ TDerived& self = static_cast<TDerived&>(*this);
+ if constexpr (std::is_invocable_v<T, TDerived, const TActorContext&>) {
+ self.Bootstrap(ctx);
+ } else if constexpr (std::is_invocable_v<T, TDerived, const TActorId&, const TActorContext&>) {
+ self.Bootstrap(ev->Sender, ctx);
+ } else if constexpr (std::is_invocable_v<T, TDerived>) {
+ self.Bootstrap();
+ } else if constexpr (std::is_invocable_v<T, TDerived, const TActorId&>) {
+ self.Bootstrap(ev->Sender);
+ } else {
+ static_assert(dependent_false<TDerived>::value, "No correct Bootstrap() signature");
+ }
+ }
+
+ TActorBootstrapped()
+ : TActor<TDerived>(&TDerived::StateBootstrap)
+ {}
+ };
+}
diff --git a/library/cpp/actors/core/actor_coroutine.cpp b/library/cpp/actors/core/actor_coroutine.cpp
new file mode 100644
index 0000000000..0ab4d2b24d
--- /dev/null
+++ b/library/cpp/actors/core/actor_coroutine.cpp
@@ -0,0 +1,165 @@
+#include "actor_coroutine.h"
+#include "executor_thread.h"
+
+#include <util/system/sanitizers.h>
+#include <util/system/type_name.h>
+
+namespace NActors {
+ static constexpr size_t StackOverflowGap = 4096;
+ static char GoodStack[StackOverflowGap];
+
+ static struct TInitGoodStack {
+ TInitGoodStack() {
+ // fill stack with some pseudo-random pattern
+ for (size_t k = 0; k < StackOverflowGap; ++k) {
+ GoodStack[k] = k + k * 91;
+ }
+ }
+ } initGoodStack;
+
+ TActorCoroImpl::TActorCoroImpl(size_t stackSize, bool allowUnhandledPoisonPill, bool allowUnhandledDtor)
+ : Stack(stackSize)
+ , AllowUnhandledPoisonPill(allowUnhandledPoisonPill)
+ , AllowUnhandledDtor(allowUnhandledDtor)
+ , FiberClosure{this, TArrayRef(Stack.Begin(), Stack.End())}
+ , FiberContext(FiberClosure)
+ {
+#ifndef NDEBUG
+ char* p;
+#if STACK_GROW_DOWN
+ p = Stack.Begin();
+#else
+ p = Stack.End() - StackOverflowGap;
+#endif
+ memcpy(p, GoodStack, StackOverflowGap);
+#endif
+ }
+
+ TActorCoroImpl::~TActorCoroImpl() {
+ if (!Finished && !NSan::TSanIsOn()) { // only resume when we have bootstrapped and Run() was entered and not yet finished; in other case simply terminate
+ Y_VERIFY(!PendingEvent);
+ Resume();
+ }
+ }
+
+ bool TActorCoroImpl::Send(TAutoPtr<IEventHandle> ev) {
+ return GetActorContext().ExecutorThread.Send(ev);
+ }
+
+ THolder<IEventHandle> TActorCoroImpl::WaitForEvent(TInstant deadline) {
+ const ui64 cookie = ++WaitCookie;
+ if (deadline != TInstant::Max()) {
+ ActorContext->ExecutorThread.Schedule(deadline - Now(), new IEventHandle(SelfActorId, {}, new TEvCoroTimeout,
+ 0, cookie));
+ }
+
+ // ensure we have no unprocessed event and return back to actor system to receive one
+ Y_VERIFY(!PendingEvent);
+ ReturnToActorSystem();
+
+ // obtain pending event and ensure we've got one
+ while (THolder<IEventHandle> event = std::exchange(PendingEvent, {})) {
+ if (event->GetTypeRewrite() != TEvents::TSystem::CoroTimeout) {
+ // special handling for poison pill -- we throw exception
+ if (event->GetTypeRewrite() == TEvents::TEvPoisonPill::EventType) {
+ throw TPoisonPillException();
+ }
+
+ // otherwise just return received event
+ return event;
+ } else if (event->Cookie == cookie) {
+ return nullptr; // it is not a race -- we've got timeout exactly for our current wait
+ } else {
+ ReturnToActorSystem(); // drop this event and wait for the next one
+ }
+ }
+ Y_FAIL("no pending event");
+ }
+
+ const TActorContext& TActorCoroImpl::GetActorContext() const {
+ Y_VERIFY(ActorContext);
+ return *ActorContext;
+ }
+
+ bool TActorCoroImpl::ProcessEvent(THolder<IEventHandle> ev) {
+ Y_VERIFY(!PendingEvent);
+ if (!SelfActorId) { // process bootstrap message, extract actor ids
+ Y_VERIFY(ev->GetTypeRewrite() == TEvents::TSystem::Bootstrap);
+ SelfActorId = ev->Recipient;
+ ParentActorId = ev->Sender;
+ } else { // process further messages
+ PendingEvent = std::move(ev);
+ }
+
+ // prepare actor context for in-coroutine use
+ TActivationContext *ac = TlsActivationContext;
+ TlsActivationContext = nullptr;
+ TActorContext ctx(ac->Mailbox, ac->ExecutorThread, ac->EventStart, SelfActorId);
+ ActorContext = &ctx;
+
+ Resume();
+
+ // drop actor context
+ TlsActivationContext = ac;
+ ActorContext = nullptr;
+
+ return Finished;
+ }
+
+ void TActorCoroImpl::Resume() {
+ // save caller context for a later return
+ Y_VERIFY(!ActorSystemContext);
+ TExceptionSafeContext actorSystemContext;
+ ActorSystemContext = &actorSystemContext;
+
+ // go to actor coroutine
+ BeforeResume();
+ ActorSystemContext->SwitchTo(&FiberContext);
+
+ // check for stack overflow
+#ifndef NDEBUG
+ const char* p;
+#if STACK_GROW_DOWN
+ p = Stack.Begin();
+#else
+ p = Stack.End() - StackOverflowGap;
+#endif
+ Y_VERIFY_DEBUG(memcmp(p, GoodStack, StackOverflowGap) == 0);
+#endif
+ }
+
+ void TActorCoroImpl::DoRun() {
+ try {
+ if (ActorContext) { // ActorContext may be nullptr here if the destructor was invoked before bootstrapping
+ Y_VERIFY(!PendingEvent);
+ Run();
+ }
+ } catch (const TPoisonPillException& /*ex*/) {
+ if (!AllowUnhandledPoisonPill) {
+ Y_FAIL("unhandled TPoisonPillException");
+ }
+ } catch (const TDtorException& /*ex*/) {
+ if (!AllowUnhandledDtor) {
+ Y_FAIL("unhandled TDtorException");
+ }
+ } catch (const std::exception& ex) {
+ Y_FAIL("unhandled exception of type %s", TypeName(ex).data());
+ } catch (...) {
+ Y_FAIL("unhandled exception of type not derived from std::exception");
+ }
+ Finished = true;
+ ReturnToActorSystem();
+ }
+
+ void TActorCoroImpl::ReturnToActorSystem() {
+ TExceptionSafeContext* returnContext = std::exchange(ActorSystemContext, nullptr);
+ Y_VERIFY(returnContext);
+ FiberContext.SwitchTo(returnContext);
+ if (!PendingEvent) {
+ // we have returned from the actor system and it kindly asks us to terminate the coroutine as it is being
+ // stopped
+ throw TDtorException();
+ }
+ }
+
+}
diff --git a/library/cpp/actors/core/actor_coroutine.h b/library/cpp/actors/core/actor_coroutine.h
new file mode 100644
index 0000000000..6bcb768eaf
--- /dev/null
+++ b/library/cpp/actors/core/actor_coroutine.h
@@ -0,0 +1,174 @@
+#pragma once
+
+#include <util/system/context.h>
+#include <util/system/filemap.h>
+
+#include "actor_bootstrapped.h"
+#include "executor_thread.h"
+#include "event_local.h"
+
+namespace NActors {
+
+ class TActorCoro;
+
+ class TActorCoroImpl : public ITrampoLine {
+ TMappedAllocation Stack;
+ bool AllowUnhandledPoisonPill;
+ bool AllowUnhandledDtor;
+ TContClosure FiberClosure;
+ TExceptionSafeContext FiberContext;
+ TExceptionSafeContext* ActorSystemContext = nullptr;
+ THolder<IEventHandle> PendingEvent;
+ bool Finished = false;
+ ui64 WaitCookie = 0;
+ TActorContext *ActorContext = nullptr;
+
+ protected:
+ TActorIdentity SelfActorId = TActorIdentity(TActorId());
+ TActorId ParentActorId;
+
+ private:
+ template <typename TFirstEvent, typename... TOtherEvents>
+ struct TIsOneOf: public TIsOneOf<TOtherEvents...> {
+ bool operator()(IEventHandle& ev) const {
+ return ev.GetTypeRewrite() == TFirstEvent::EventType || TIsOneOf<TOtherEvents...>()(ev);
+ }
+ };
+
+ template <typename TSingleEvent>
+ struct TIsOneOf<TSingleEvent> {
+ bool operator()(IEventHandle& ev) const {
+ return ev.GetTypeRewrite() == TSingleEvent::EventType;
+ }
+ };
+
+ struct TEvCoroTimeout : TEventLocal<TEvCoroTimeout, TEvents::TSystem::CoroTimeout> {};
+
+ protected:
+ struct TPoisonPillException : yexception {};
+ struct TDtorException : yexception {};
+
+ public:
+ TActorCoroImpl(size_t stackSize, bool allowUnhandledPoisonPill = false, bool allowUnhandledDtor = false);
+ // specify stackSize explicitly for each actor; don't forget about overflow control gap
+
+ virtual ~TActorCoroImpl();
+
+ virtual void Run() = 0;
+
+ virtual void BeforeResume() {}
+
+ // Handle all events that are not expected in wait loops.
+ virtual void ProcessUnexpectedEvent(TAutoPtr<IEventHandle> ev) = 0;
+
+ // Release execution ownership and wait for some event to arrive. When PoisonPill event is received, then
+ // TPoisonPillException is thrown.
+ THolder<IEventHandle> WaitForEvent(TInstant deadline = TInstant::Max());
+
+ // Wait for specific event set by filter functor. Function returns first event that matches filter. On any other
+ // kind of event ProcessUnexpectedEvent() is called.
+ //
+ // Example: WaitForSpecificEvent([](IEventHandle& ev) { return ev.Cookie == 42; });
+ template <typename TFunc>
+ THolder<IEventHandle> WaitForSpecificEvent(TFunc&& filter, TInstant deadline = TInstant::Max()) {
+ for (;;) {
+ if (THolder<IEventHandle> event = WaitForEvent(deadline); !event) {
+ return nullptr;
+ } else if (filter(*event)) {
+ return event;
+ } else {
+ ProcessUnexpectedEvent(event);
+ }
+ }
+ }
+
+ // Wait for specific event or set of events. Function returns first event that matches enlisted type. On any other
+ // kind of event ProcessUnexpectedEvent() is called.
+ //
+ // Example: WaitForSpecificEvent<TEvReadResult, TEvFinished>();
+ template <typename TFirstEvent, typename TSecondEvent, typename... TOtherEvents>
+ THolder<IEventHandle> WaitForSpecificEvent(TInstant deadline = TInstant::Max()) {
+ TIsOneOf<TFirstEvent, TSecondEvent, TOtherEvents...> filter;
+ return WaitForSpecificEvent(filter, deadline);
+ }
+
+ // Wait for single specific event.
+ template <typename TEventType>
+ THolder<typename TEventType::THandle> WaitForSpecificEvent(TInstant deadline = TInstant::Max()) {
+ auto filter = [](IEventHandle& ev) {
+ return ev.GetTypeRewrite() == TEventType::EventType;
+ };
+ THolder<IEventHandle> event = WaitForSpecificEvent(filter, deadline);
+ return THolder<typename TEventType::THandle>(static_cast<typename TEventType::THandle*>(event ? event.Release() : nullptr));
+ }
+
+ protected: // Actor System compatibility section
+ const TActorContext& GetActorContext() const;
+ TActorSystem *GetActorSystem() const { return GetActorContext().ExecutorThread.ActorSystem; }
+ TInstant Now() const { return GetActorContext().Now(); }
+
+ bool Send(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) {
+ return GetActorContext().Send(recipient, ev, flags, cookie, std::move(traceId));
+ }
+
+ template <typename TEvent>
+ bool Send(const TActorId& recipient, THolder<TEvent> ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) {
+ return GetActorContext().Send(recipient, ev.Release(), flags, cookie, std::move(traceId));
+ }
+
+ bool Send(TAutoPtr<IEventHandle> ev);
+
+ void Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie = nullptr) {
+ return GetActorContext().Schedule(delta, ev, cookie);
+ }
+
+ void Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) {
+ return GetActorContext().Schedule(deadline, ev, cookie);
+ }
+
+ void Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) {
+ return GetActorContext().Schedule(deadline, ev, cookie);
+ }
+
+ TActorId Register(IActor* actor, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>()) {
+ return GetActorContext().Register(actor, mailboxType, poolId);
+ }
+
+ TActorId RegisterWithSameMailbox(IActor* actor) {
+ return GetActorContext().RegisterWithSameMailbox(actor);
+ }
+
+ private:
+ friend class TActorCoro;
+ bool ProcessEvent(THolder<IEventHandle> ev);
+
+ private:
+ /* Resume() function goes to actor coroutine context and continues (or starts) to execute it until actor finishes
+ * his job or it is blocked on WaitForEvent. Then the function returns. */
+ void Resume();
+ void ReturnToActorSystem();
+ void DoRun() override final;
+ };
+
+ class TActorCoro : public IActor {
+ THolder<TActorCoroImpl> Impl;
+
+ public:
+ TActorCoro(THolder<TActorCoroImpl> impl, ui32 activityType = IActor::ACTORLIB_COMMON)
+ : IActor(static_cast<TReceiveFunc>(&TActorCoro::StateFunc), activityType)
+ , Impl(std::move(impl))
+ {}
+
+ TAutoPtr<IEventHandle> AfterRegister(const TActorId& self, const TActorId& parent) override {
+ return new IEventHandle(TEvents::TSystem::Bootstrap, 0, self, parent, {}, 0);
+ }
+
+ private:
+ STATEFN(StateFunc) {
+ if (Impl->ProcessEvent(ev)) {
+ PassAway();
+ }
+ }
+ };
+
+}
diff --git a/library/cpp/actors/core/actor_coroutine_ut.cpp b/library/cpp/actors/core/actor_coroutine_ut.cpp
new file mode 100644
index 0000000000..951512b877
--- /dev/null
+++ b/library/cpp/actors/core/actor_coroutine_ut.cpp
@@ -0,0 +1,141 @@
+#include "actor_coroutine.h"
+#include "actorsystem.h"
+#include "executor_pool_basic.h"
+#include "scheduler_basic.h"
+#include "events.h"
+#include "event_local.h"
+#include "hfunc.h"
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/system/sanitizers.h>
+
+using namespace NActors;
+
+Y_UNIT_TEST_SUITE(ActorCoro) {
+ enum {
+ Begin = EventSpaceBegin(TEvents::ES_USERSPACE),
+ Request,
+ Response,
+ Enough
+ };
+
+ struct TEvRequest: public TEventLocal<TEvRequest, Request> {
+ };
+
+ struct TEvResponse: public TEventLocal<TEvResponse, Response> {
+ };
+
+ struct TEvEnough: public TEventLocal<TEvEnough, Enough> {
+ };
+
+ class TBasicResponderActor: public TActorBootstrapped<TBasicResponderActor> {
+ TDeque<TActorId> RespondTo;
+
+ public:
+ TBasicResponderActor() {
+ }
+
+ void Bootstrap(const TActorContext& /*ctx*/) {
+ Become(&TBasicResponderActor::StateFunc);
+ }
+
+ STFUNC(StateFunc) {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(TEvRequest, Handle);
+ HFunc(TEvents::TEvWakeup, Handle);
+ HFunc(TEvents::TEvPoisonPill, Handle);
+ }
+ }
+
+ void Handle(TEvRequest::TPtr& ev, const TActorContext& ctx) {
+ RespondTo.push_back(ev->Sender);
+ ctx.Schedule(TDuration::Seconds(1), new TEvents::TEvWakeup);
+ }
+
+ void Handle(TEvents::TEvWakeup::TPtr& /*ev*/, const TActorContext& ctx) {
+ ctx.Send(RespondTo.front(), new TEvResponse());
+ RespondTo.pop_front();
+ }
+
+ void Handle(TEvents::TEvPoisonPill::TPtr& /*ev*/, const TActorContext& ctx) {
+ Die(ctx);
+ }
+ };
+
+ class TCoroActor: public TActorCoroImpl {
+ TManualEvent& DoneEvent;
+ TAtomic& ItemsProcessed;
+ bool Finish;
+
+ public:
+ TCoroActor(TManualEvent& doneEvent, TAtomic& itemsProcessed)
+ : TActorCoroImpl(1 << 20)
+ , DoneEvent(doneEvent)
+ , ItemsProcessed(itemsProcessed)
+ , Finish(false)
+ {
+ }
+
+ void Run() override {
+ TActorId child = GetActorContext().Register(new TBasicResponderActor);
+ ui32 itemsProcessed = 0;
+ try {
+ while (!Finish) {
+ GetActorContext().Send(child, new TEvRequest());
+ THolder<IEventHandle> resp = WaitForSpecificEvent<TEvResponse>();
+ UNIT_ASSERT_EQUAL(resp->GetTypeRewrite(), TEvResponse::EventType);
+ ++itemsProcessed;
+ }
+ } catch (const TPoisonPillException& /*ex*/) {
+ }
+ GetActorContext().Send(child, new TEvents::TEvPoisonPill);
+
+ AtomicSet(ItemsProcessed, itemsProcessed);
+ DoneEvent.Signal();
+ }
+
+ void ProcessUnexpectedEvent(TAutoPtr<IEventHandle> event) override {
+ if (event->GetTypeRewrite() == Enough) {
+ Finish = true;
+ }
+ }
+ };
+
+ void Check(THolder<IEventBase> && message) {
+ THolder<TActorSystemSetup> setup = MakeHolder<TActorSystemSetup>();
+ setup->NodeId = 0;
+ setup->ExecutorsCount = 1;
+ setup->Executors.Reset(new TAutoPtr<IExecutorPool>[setup->ExecutorsCount]);
+ for (ui32 i = 0; i < setup->ExecutorsCount; ++i) {
+ setup->Executors[i] = new TBasicExecutorPool(i, 5, 10, "basic");
+ }
+ setup->Scheduler = new TBasicSchedulerThread;
+
+ TActorSystem actorSystem(setup);
+
+ actorSystem.Start();
+
+ TManualEvent doneEvent;
+ TAtomic itemsProcessed = 0;
+ TActorId actor = actorSystem.Register(new TActorCoro(MakeHolder<TCoroActor>(doneEvent, itemsProcessed)));
+ NanoSleep(3UL * 1000 * 1000 * 1000);
+ actorSystem.Send(actor, message.Release());
+ doneEvent.WaitI();
+
+ UNIT_ASSERT(AtomicGet(itemsProcessed) >= 2);
+
+ actorSystem.Stop();
+ }
+
+ Y_UNIT_TEST(Basic) {
+ if (NSan::TSanIsOn()) {
+ // TODO https://st.yandex-team.ru/DEVTOOLS-3154
+ return;
+ }
+ Check(MakeHolder<TEvEnough>());
+ }
+
+ Y_UNIT_TEST(PoisonPill) {
+ Check(MakeHolder<TEvents::TEvPoisonPill>());
+ }
+}
diff --git a/library/cpp/actors/core/actor_ut.cpp b/library/cpp/actors/core/actor_ut.cpp
new file mode 100644
index 0000000000..e1b765ec72
--- /dev/null
+++ b/library/cpp/actors/core/actor_ut.cpp
@@ -0,0 +1,578 @@
+#include "actor.cpp"
+#include "events.h"
+#include "actorsystem.h"
+#include "executor_pool_basic.h"
+#include "scheduler_basic.h"
+#include "actor_bootstrapped.h"
+
+#include <library/cpp/actors/util/threadparkpad.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/generic/algorithm.h>
+#include <util/system/atomic.h>
+#include <util/system/rwlock.h>
+#include <util/system/hp_timer.h>
+
+using namespace NActors;
+
+struct TTestEndDecorator : TDecorator {
+ TThreadParkPad* Pad;
+ TAtomic* ActorsAlive;
+
+ TTestEndDecorator(THolder<IActor>&& actor, TThreadParkPad* pad, TAtomic* actorsAlive)
+ : TDecorator(std::move(actor))
+ , Pad(pad)
+ , ActorsAlive(actorsAlive)
+ {
+ AtomicIncrement(*ActorsAlive);
+ }
+
+ ~TTestEndDecorator() {
+ if (AtomicDecrement(*ActorsAlive) == 0) {
+ Pad->Unpark();
+ }
+ }
+};
+
+Y_UNIT_TEST_SUITE(ActorBenchmark) {
+ static constexpr bool DefaultNoRealtime = true;
+ static constexpr ui32 DefaultSpinThreshold = 1000000;
+ static constexpr ui32 TotalEventsAmount = 1000;
+
+ class TDummyActor : public TActor<TDummyActor> {
+ public:
+ TDummyActor() : TActor<TDummyActor>(&TDummyActor::StateFunc) {}
+ STFUNC(StateFunc) {
+ (void)ev;
+ (void)ctx;
+ }
+ };
+
+ enum ERole {
+ Leader,
+ Follower
+ };
+
+ class TSendReceiveActor : public TActorBootstrapped<TSendReceiveActor> {
+ public:
+ static constexpr auto ActorActivityType() {
+ return ACTORLIB_COMMON;
+ }
+
+ TSendReceiveActor(double* elapsedTime, TActorId receiver, bool allocation, ERole role, ui32 neighbours = 0)
+ : EventsCounter(TotalEventsAmount)
+ , ElapsedTime(elapsedTime)
+ , Receiver(receiver)
+ , AllocatesMemory(allocation)
+ , Role(role)
+ , MailboxNeighboursCount(neighbours)
+ {}
+
+ void Bootstrap(const TActorContext &ctx) {
+ if (!Receiver) {
+ this->Receiver = SelfId();
+ } else {
+ EventsCounter /= 2; // We want to measure CPU requirement for one-way send
+ }
+ Timer.Reset();
+ Become(&TThis::StateFunc);
+ for (ui32 i = 0; i < MailboxNeighboursCount; ++i) {
+ ctx.RegisterWithSameMailbox(new TDummyActor());
+ }
+ if (Role == Leader) {
+ Send(Receiver, new TEvents::TEvPing());
+ }
+ }
+
+ STATEFN(StateFunc) {
+ if (EventsCounter == 0 && ElapsedTime != nullptr) {
+ *ElapsedTime = Timer.Passed() / TotalEventsAmount;
+ PassAway();
+ }
+
+ if (AllocatesMemory) {
+ Send(ev->Sender, new TEvents::TEvPing());
+ } else {
+ std::swap(*const_cast<TActorId*>(&ev->Sender), *const_cast<TActorId*>(&ev->Recipient));
+ ev->DropRewrite();
+ TActivationContext::Send(ev.Release());
+ }
+ EventsCounter--;
+ }
+
+ private:
+ THPTimer Timer;
+ ui64 EventsCounter;
+ double* ElapsedTime;
+ TActorId Receiver;
+ bool AllocatesMemory;
+ ERole Role;
+ ui32 MailboxNeighboursCount;
+ };
+
+ void AddBasicPool(THolder<TActorSystemSetup>& setup, ui32 threads, bool activateEveryEvent) {
+ TBasicExecutorPoolConfig basic;
+ basic.PoolId = setup->GetExecutorsCount();
+ basic.PoolName = TStringBuilder() << "b" << basic.PoolId;
+ basic.Threads = threads;
+ basic.SpinThreshold = DefaultSpinThreshold;
+ basic.TimePerMailbox = TDuration::Hours(1);
+ if (activateEveryEvent) {
+ basic.EventsPerMailbox = 1;
+ } else {
+ basic.EventsPerMailbox = Max<ui32>();
+ }
+ setup->CpuManager.Basic.emplace_back(std::move(basic));
+ }
+
+ void AddUnitedPool(THolder<TActorSystemSetup>& setup, ui32 concurrency, bool activateEveryEvent) {
+ TUnitedExecutorPoolConfig united;
+ united.PoolId = setup->GetExecutorsCount();
+ united.PoolName = TStringBuilder() << "u" << united.PoolId;
+ united.Concurrency = concurrency;
+ united.TimePerMailbox = TDuration::Hours(1);
+ if (activateEveryEvent) {
+ united.EventsPerMailbox = 1;
+ } else {
+ united.EventsPerMailbox = Max<ui32>();
+ }
+ setup->CpuManager.United.emplace_back(std::move(united));
+ }
+
+ THolder<TActorSystemSetup> GetActorSystemSetup(ui32 unitedCpuCount, bool preemption) {
+ auto setup = MakeHolder<NActors::TActorSystemSetup>();
+ setup->NodeId = 1;
+ setup->CpuManager.UnitedWorkers.CpuCount = unitedCpuCount;
+ setup->CpuManager.UnitedWorkers.SpinThresholdUs = DefaultSpinThreshold;
+ setup->CpuManager.UnitedWorkers.NoRealtime = DefaultNoRealtime;
+ if (preemption) {
+ setup->CpuManager.UnitedWorkers.PoolLimitUs = 500;
+ setup->CpuManager.UnitedWorkers.EventLimitUs = 100;
+ setup->CpuManager.UnitedWorkers.LimitPrecisionUs = 100;
+ } else {
+ setup->CpuManager.UnitedWorkers.PoolLimitUs = 100'000'000'000;
+ setup->CpuManager.UnitedWorkers.EventLimitUs = 10'000'000'000;
+ setup->CpuManager.UnitedWorkers.LimitPrecisionUs = 10'000'000'000;
+ }
+ setup->Scheduler = new TBasicSchedulerThread(NActors::TSchedulerConfig(512, 0));
+ return setup;
+ }
+
+ enum class EPoolType {
+ Basic,
+ United
+ };
+
+ THolder<TActorSystemSetup> InitActorSystemSetup(EPoolType poolType, ui32 poolsCount, ui32 threads, bool activateEveryEvent, bool preemption) {
+ if (poolType == EPoolType::Basic) {
+ THolder<TActorSystemSetup> setup = GetActorSystemSetup(0, false);
+ for (ui32 i = 0; i < poolsCount; ++i) {
+ AddBasicPool(setup, threads, activateEveryEvent);
+ }
+ return setup;
+ } else if (poolType == EPoolType::United) {
+ THolder<TActorSystemSetup> setup = GetActorSystemSetup(poolsCount * threads, preemption);
+ for (ui32 i = 0; i < poolsCount; ++i) {
+ AddUnitedPool(setup, threads, activateEveryEvent);
+ }
+ return setup;
+ }
+ Y_FAIL();
+ }
+
+ double BenchSendReceive(bool allocation, NActors::TMailboxType::EType mType, EPoolType poolType) {
+ THolder<TActorSystemSetup> setup = InitActorSystemSetup(poolType, 1, 1, false, false);
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+
+ TThreadParkPad pad;
+ TAtomic actorsAlive = 0;
+ double elapsedTime = 0;
+ THolder<IActor> endActor{
+ new TTestEndDecorator(THolder(
+ new TSendReceiveActor(&elapsedTime, {}, allocation, Leader)), &pad, &actorsAlive)};
+
+ actorSystem.Register(endActor.Release(), mType);
+
+ pad.Park();
+ actorSystem.Stop();
+
+ return 1e9 * elapsedTime;
+ }
+
+ double BenchSendActivateReceive(ui32 poolsCount, ui32 threads, bool allocation, EPoolType poolType) {
+ THolder<TActorSystemSetup> setup = InitActorSystemSetup(poolType, poolsCount, threads, true, false);
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+
+ TThreadParkPad pad;
+ TAtomic actorsAlive = 0;
+ double elapsedTime = 0;
+ ui32 followerPoolId = 0;
+
+ ui32 leaderPoolId = poolsCount == 1 ? 0 : 1;
+ TActorId followerId = actorSystem.Register(
+ new TSendReceiveActor(nullptr, {}, allocation, Follower), TMailboxType::HTSwap, followerPoolId);
+ THolder<IActor> leader{
+ new TTestEndDecorator(THolder(
+ new TSendReceiveActor(&elapsedTime, followerId, allocation, Leader)), &pad, &actorsAlive)};
+ actorSystem.Register(leader.Release(), TMailboxType::HTSwap, leaderPoolId);
+
+ pad.Park();
+ actorSystem.Stop();
+
+ return 1e9 * elapsedTime;
+ }
+
+ double BenchSendActivateReceiveWithMailboxNeighbours(ui32 MailboxNeighbourActors, EPoolType poolType) {
+ THolder<TActorSystemSetup> setup = InitActorSystemSetup(poolType, 1, 1, false, false);
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+
+ TThreadParkPad pad;
+ TAtomic actorsAlive = 0;
+ double elapsedTime = 0;
+
+ TActorId followerId = actorSystem.Register(
+ new TSendReceiveActor(nullptr, {}, false, Follower, MailboxNeighbourActors), TMailboxType::HTSwap);
+ THolder<IActor> leader{
+ new TTestEndDecorator(THolder(
+ new TSendReceiveActor(&elapsedTime, followerId, false, Leader, MailboxNeighbourActors)), &pad, &actorsAlive)};
+ actorSystem.Register(leader.Release(), TMailboxType::HTSwap);
+
+ pad.Park();
+ actorSystem.Stop();
+
+ return 1e9 * elapsedTime;
+ }
+
+ double BenchContentedThreads(ui32 threads, ui32 actorsPairsCount, EPoolType poolType) {
+ THolder<TActorSystemSetup> setup = InitActorSystemSetup(poolType, 1, threads, true, false);
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+
+ TThreadParkPad pad;
+ TAtomic actorsAlive = 0;
+ THPTimer Timer;
+
+ TVector<double> dummy(actorsPairsCount);
+ Timer.Reset();
+ for (ui32 i = 0; i < actorsPairsCount; ++i) {
+ ui32 followerPoolId = 0;
+ ui32 leaderPoolId = 0;
+ TActorId followerId = actorSystem.Register(
+ new TSendReceiveActor(nullptr, {}, true, Follower), TMailboxType::HTSwap, followerPoolId);
+ THolder<IActor> leader{
+ new TTestEndDecorator(THolder(
+ new TSendReceiveActor(&dummy[i], followerId, true, Leader)), &pad, &actorsAlive)};
+ actorSystem.Register(leader.Release(), TMailboxType::HTSwap, leaderPoolId);
+ }
+
+ pad.Park();
+ auto elapsedTime = Timer.Passed() / TotalEventsAmount;
+ actorSystem.Stop();
+
+ return 1e9 * elapsedTime;
+ }
+
+ auto Mean(const TVector<double>& data) {
+ return Accumulate(data.begin(), data.end(), 0.0) / data.size();
+ }
+
+ auto Deviation(const TVector<double>& data) {
+ auto mean = Mean(data);
+ double deviation = 0.0;
+ for (const auto& x : data) {
+ deviation += (x - mean) * (x - mean);
+ }
+ return std::sqrt(deviation / data.size());
+ }
+
+ struct TStats {
+ double Mean;
+ double Deviation;
+ TString ToString() {
+ return TStringBuilder() << Mean << " ± " << Deviation << " ns " << std::ceil(Deviation / Mean * 1000) / 10.0 << "%";
+ }
+ };
+
+ template <typename Func>
+ TStats CountStats(Func func, ui32 itersCount = 5) {
+ TVector<double> elapsedTimes;
+ for (ui32 i = 0; i < itersCount; ++i) {
+ auto elapsedTime = func();
+ elapsedTimes.push_back(elapsedTime);
+ }
+ return {Mean(elapsedTimes), Deviation(elapsedTimes)};
+ }
+
+ TVector<NActors::TMailboxType::EType> MailboxTypes = {
+ TMailboxType::Simple,
+ TMailboxType::Revolving,
+ TMailboxType::HTSwap,
+ TMailboxType::ReadAsFilled,
+ TMailboxType::TinyReadAsFilled
+ };
+
+ Y_UNIT_TEST(SendReceive1Pool1ThreadAlloc) {
+ for (const auto& mType : MailboxTypes) {
+ auto stats = CountStats([mType] {
+ return BenchSendReceive(true, mType, EPoolType::Basic);
+ });
+ Cerr << stats.ToString() << " " << mType << Endl;
+ }
+ }
+
+ Y_UNIT_TEST(SendReceive1Pool1ThreadAllocUnited) {
+ for (const auto& mType : MailboxTypes) {
+ auto stats = CountStats([mType] {
+ return BenchSendReceive(true, mType, EPoolType::United);
+ });
+ Cerr << stats.ToString() << " " << mType << Endl;
+ }
+ }
+
+ Y_UNIT_TEST(SendReceive1Pool1ThreadNoAlloc) {
+ for (const auto& mType : MailboxTypes) {
+ auto stats = CountStats([mType] {
+ return BenchSendReceive(false, mType, EPoolType::Basic);
+ });
+ Cerr << stats.ToString() << " " << mType << Endl;
+ }
+ }
+
+ Y_UNIT_TEST(SendReceive1Pool1ThreadNoAllocUnited) {
+ for (const auto& mType : MailboxTypes) {
+ auto stats = CountStats([mType] {
+ return BenchSendReceive(false, mType, EPoolType::United);
+ });
+ Cerr << stats.ToString() << " " << mType << Endl;
+ }
+ }
+
+ Y_UNIT_TEST(SendActivateReceive1Pool1ThreadAlloc) {
+ auto stats = CountStats([] {
+ return BenchSendActivateReceive(1, 1, true, EPoolType::Basic);
+ });
+ Cerr << stats.ToString() << Endl;
+ }
+
+ Y_UNIT_TEST(SendActivateReceive1Pool1ThreadAllocUnited) {
+ auto stats = CountStats([] {
+ return BenchSendActivateReceive(1, 1, true, EPoolType::United);
+ });
+ Cerr << stats.ToString() << Endl;
+ }
+
+ Y_UNIT_TEST(SendActivateReceive1Pool1ThreadNoAlloc) {
+ auto stats = CountStats([] {
+ return BenchSendActivateReceive(1, 1, false, EPoolType::Basic);
+ });
+ Cerr << stats.ToString() << Endl;
+ }
+
+ Y_UNIT_TEST(SendActivateReceive1Pool1ThreadNoAllocUnited) {
+ auto stats = CountStats([] {
+ return BenchSendActivateReceive(1, 1, false, EPoolType::United);
+ });
+ Cerr << stats.ToString() << Endl;
+ }
+
+ Y_UNIT_TEST(SendActivateReceive1Pool2ThreadsAlloc) {
+ auto stats = CountStats([] {
+ return BenchSendActivateReceive(1, 2, true, EPoolType::Basic);
+ });
+ Cerr << stats.ToString() << Endl;
+ }
+
+ Y_UNIT_TEST(SendActivateReceive1Pool2ThreadsAllocUnited) {
+ auto stats = CountStats([] {
+ return BenchSendActivateReceive(1, 2, true, EPoolType::United);
+ });
+ Cerr << stats.ToString() << Endl;
+ }
+
+ Y_UNIT_TEST(SendActivateReceive1Pool2ThreadsNoAlloc) {
+ auto stats = CountStats([] {
+ return BenchSendActivateReceive(1, 2, false, EPoolType::Basic);
+ });
+ Cerr << stats.ToString() << Endl;
+ }
+
+ Y_UNIT_TEST(SendActivateReceive1Pool2ThreadsNoAllocUnited) {
+ auto stats = CountStats([] {
+ return BenchSendActivateReceive(1, 2, false, EPoolType::United);
+ });
+ Cerr << stats.ToString() << Endl;
+ }
+
+ Y_UNIT_TEST(SendActivateReceive2Pool1ThreadAlloc) {
+ auto stats = CountStats([] {
+ return BenchSendActivateReceive(2, 1, true, EPoolType::Basic);
+ });
+ Cerr << stats.ToString() << Endl;
+ }
+
+ Y_UNIT_TEST(SendActivateReceive2Pool1ThreadAllocUnited) {
+ auto stats = CountStats([] {
+ return BenchSendActivateReceive(2, 1, true, EPoolType::United);
+ });
+ Cerr << stats.ToString() << Endl;
+ }
+
+ Y_UNIT_TEST(SendActivateReceive2Pool1ThreadNoAlloc) {
+ auto stats = CountStats([] {
+ return BenchSendActivateReceive(2, 1, false, EPoolType::Basic);
+ });
+ Cerr << stats.ToString() << Endl;
+ }
+
+ Y_UNIT_TEST(SendActivateReceive2Pool1ThreadNoAllocUnited) {
+ auto stats = CountStats([] {
+ return BenchSendActivateReceive(2, 1, false, EPoolType::United);
+ });
+ Cerr << stats.ToString() << Endl;
+ }
+
+ void RunBenchContentedThreads(ui32 threads, EPoolType poolType) {
+ for (ui32 actorPairs = 1; actorPairs <= 2 * threads; actorPairs++) {
+ auto stats = CountStats([threads, actorPairs, poolType] {
+ return BenchContentedThreads(threads, actorPairs, poolType);
+ });
+ Cerr << stats.ToString() << " actorPairs: " << actorPairs << Endl;
+ }
+ }
+
+ Y_UNIT_TEST(SendActivateReceive1Pool1Threads) { RunBenchContentedThreads(1, EPoolType::Basic); }
+ Y_UNIT_TEST(SendActivateReceive1Pool1ThreadsUnited) { RunBenchContentedThreads(1, EPoolType::United); }
+ Y_UNIT_TEST(SendActivateReceive1Pool2Threads) { RunBenchContentedThreads(2, EPoolType::Basic); }
+ Y_UNIT_TEST(SendActivateReceive1Pool2ThreadsUnited) { RunBenchContentedThreads(2, EPoolType::United); }
+ Y_UNIT_TEST(SendActivateReceive1Pool3Threads) { RunBenchContentedThreads(3, EPoolType::Basic); }
+ Y_UNIT_TEST(SendActivateReceive1Pool3ThreadsUnited) { RunBenchContentedThreads(3, EPoolType::United); }
+ Y_UNIT_TEST(SendActivateReceive1Pool4Threads) { RunBenchContentedThreads(4, EPoolType::Basic); }
+ Y_UNIT_TEST(SendActivateReceive1Pool4ThreadsUnited) { RunBenchContentedThreads(4, EPoolType::United); }
+ Y_UNIT_TEST(SendActivateReceive1Pool5Threads) { RunBenchContentedThreads(5, EPoolType::Basic); }
+ Y_UNIT_TEST(SendActivateReceive1Pool5ThreadsUnited) { RunBenchContentedThreads(5, EPoolType::United); }
+ Y_UNIT_TEST(SendActivateReceive1Pool6Threads) { RunBenchContentedThreads(6, EPoolType::Basic); }
+ Y_UNIT_TEST(SendActivateReceive1Pool6ThreadsUnited) { RunBenchContentedThreads(6, EPoolType::United); }
+ Y_UNIT_TEST(SendActivateReceive1Pool7Threads) { RunBenchContentedThreads(7, EPoolType::Basic); }
+ Y_UNIT_TEST(SendActivateReceive1Pool7ThreadsUnited) { RunBenchContentedThreads(7, EPoolType::United); }
+ Y_UNIT_TEST(SendActivateReceive1Pool8Threads) { RunBenchContentedThreads(8, EPoolType::Basic); }
+ Y_UNIT_TEST(SendActivateReceive1Pool8ThreadsUnited) { RunBenchContentedThreads(8, EPoolType::United); }
+
+ Y_UNIT_TEST(SendActivateReceiveWithMailboxNeighbours) {
+ TVector<ui32> NeighbourActors = {0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256};
+ for (const auto& neighbour : NeighbourActors) {
+ auto stats = CountStats([neighbour] {
+ return BenchSendActivateReceiveWithMailboxNeighbours(neighbour, EPoolType::Basic);
+ });
+ Cerr << stats.ToString() << " neighbourActors: " << neighbour << Endl;
+ }
+ }
+
+ Y_UNIT_TEST(SendActivateReceiveWithMailboxNeighboursUnited) {
+ TVector<ui32> NeighbourActors = {0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256};
+ for (const auto& neighbour : NeighbourActors) {
+ auto stats = CountStats([neighbour] {
+ return BenchSendActivateReceiveWithMailboxNeighbours(neighbour, EPoolType::United);
+ });
+ Cerr << stats.ToString() << " neighbourActors: " << neighbour << Endl;
+ }
+ }
+}
+
+Y_UNIT_TEST_SUITE(TestDecorator) {
+ struct TPingDecorator : TDecorator {
+ TAutoPtr<IEventHandle> SavedEvent = nullptr;
+ ui64* Counter;
+
+ TPingDecorator(THolder<IActor>&& actor, ui64* counter)
+ : TDecorator(std::move(actor))
+ , Counter(counter)
+ {
+ }
+
+ bool DoBeforeReceiving(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) override {
+ *Counter += 1;
+ if (ev->Type != TEvents::THelloWorld::Pong) {
+ TAutoPtr<IEventHandle> pingEv = new IEventHandle(SelfId(), SelfId(), new TEvents::TEvPing());
+ SavedEvent = ev;
+ Actor->Receive(pingEv, ctx);
+ } else {
+ Actor->Receive(SavedEvent, ctx);
+ }
+ return false;
+ }
+ };
+
+ struct TPongDecorator : TDecorator {
+ ui64* Counter;
+
+ TPongDecorator(THolder<IActor>&& actor, ui64* counter)
+ : TDecorator(std::move(actor))
+ , Counter(counter)
+ {
+ }
+
+ bool DoBeforeReceiving(TAutoPtr<IEventHandle>& ev, const TActorContext&) override {
+ *Counter += 1;
+ if (ev->Type == TEvents::THelloWorld::Ping) {
+ TAutoPtr<IEventHandle> pongEv = new IEventHandle(SelfId(), SelfId(), new TEvents::TEvPong());
+ Send(SelfId(), new TEvents::TEvPong());
+ return false;
+ }
+ return true;
+ }
+ };
+
+ struct TTestActor : TActorBootstrapped<TTestActor> {
+ static constexpr char ActorName[] = "TestActor";
+
+ void Bootstrap()
+ {
+ const auto& activityTypeIndex = GetActivityType();
+ Y_ENSURE(activityTypeIndex < GetActivityTypeCount());
+ Y_ENSURE(GetActivityTypeName(activityTypeIndex) == "TestActor");
+ PassAway();
+ }
+ };
+
+ Y_UNIT_TEST(Basic) {
+ THolder<TActorSystemSetup> setup = MakeHolder<TActorSystemSetup>();
+ setup->NodeId = 0;
+ setup->ExecutorsCount = 1;
+ setup->Executors.Reset(new TAutoPtr<IExecutorPool>[setup->ExecutorsCount]);
+ for (ui32 i = 0; i < setup->ExecutorsCount; ++i) {
+ setup->Executors[i] = new TBasicExecutorPool(i, 1, 10, "basic");
+ }
+ setup->Scheduler = new TBasicSchedulerThread;
+
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+
+ THolder<IActor> innerActor = MakeHolder<TTestActor>();
+ ui64 pongCounter = 0;
+ THolder<IActor> pongActor = MakeHolder<TPongDecorator>(std::move(innerActor), &pongCounter);
+ ui64 pingCounter = 0;
+ THolder<IActor> pingActor = MakeHolder<TPingDecorator>(std::move(pongActor), &pingCounter);
+
+ TThreadParkPad pad;
+ TAtomic actorsAlive = 0;
+
+ THolder<IActor> endActor = MakeHolder<TTestEndDecorator>(std::move(pingActor), &pad, &actorsAlive);
+ actorSystem.Register(endActor.Release(), TMailboxType::HTSwap);
+
+ pad.Park();
+ actorSystem.Stop();
+ UNIT_ASSERT(pongCounter == 2 && pingCounter == 2);
+ }
+
+ Y_UNIT_TEST(LocalProcessKey) {
+ static constexpr char ActorName[] = "TestActor";
+
+ UNIT_ASSERT((TEnumProcessKey<TActorActivityTag, IActor::EActorActivity>::GetName(IActor::INTERCONNECT_PROXY_TCP) == "INTERCONNECT_PROXY_TCP"));
+
+ UNIT_ASSERT((TLocalProcessKey<TActorActivityTag, ActorName>::GetName() == ActorName));
+ UNIT_ASSERT((TEnumProcessKey<TActorActivityTag, IActor::EActorActivity>::GetIndex(IActor::INTERCONNECT_PROXY_TCP) == IActor::INTERCONNECT_PROXY_TCP));
+ }
+}
diff --git a/library/cpp/actors/core/actorid.cpp b/library/cpp/actors/core/actorid.cpp
new file mode 100644
index 0000000000..ccda035eac
--- /dev/null
+++ b/library/cpp/actors/core/actorid.cpp
@@ -0,0 +1,34 @@
+#include "actorid.h"
+#include <util/string/builder.h>
+#include <util/string/cast.h>
+
+namespace NActors {
+ void TActorId::Out(IOutputStream& o) const {
+ o << "[" << NodeId() << ":" << LocalId() << ":" << Hint() << "]";
+ }
+
+ TString TActorId::ToString() const {
+ TString x;
+ TStringOutput o(x);
+ Out(o);
+ return x;
+ }
+
+ bool TActorId::Parse(const char* buf, ui32 sz) {
+ if (sz < 4 || buf[0] != '[' || buf[sz - 1] != ']')
+ return false;
+
+ size_t semicolons[2];
+ TStringBuf str(buf, sz);
+ semicolons[0] = str.find(':', 1);
+ if (semicolons[0] == TStringBuf::npos)
+ return false;
+ semicolons[1] = str.find(':', semicolons[0] + 1);
+ if (semicolons[1] == TStringBuf::npos)
+ return false;
+
+ bool success = TryFromString(buf + 1, semicolons[0] - 1, Raw.N.NodeId) && TryFromString(buf + semicolons[0] + 1, semicolons[1] - semicolons[0] - 1, Raw.N.LocalId) && TryFromString(buf + semicolons[1] + 1, sz - semicolons[1] - 2, Raw.N.Hint);
+
+ return success;
+ }
+}
diff --git a/library/cpp/actors/core/actorid.h b/library/cpp/actors/core/actorid.h
new file mode 100644
index 0000000000..d972b1a0ff
--- /dev/null
+++ b/library/cpp/actors/core/actorid.h
@@ -0,0 +1,196 @@
+#pragma once
+
+#include "defs.h"
+#include <util/stream/output.h> // for IOutputStream
+#include <util/generic/hash.h>
+
+namespace NActors {
+ // used as global uniq address of actor
+ // also could be used to transport service id (12 byte strings placed in hint-localid)
+ // highest 1 bit of node - mark of service id
+ // next 11 bits of node-id - pool id
+ // next 20 bits - node id itself
+
+ struct TActorId {
+ static constexpr ui32 MaxServiceIDLength = 12;
+ static constexpr ui32 MaxPoolID = 0x000007FF;
+ static constexpr ui32 MaxNodeId = 0x000FFFFF;
+ static constexpr ui32 PoolIndexShift = 20;
+ static constexpr ui32 PoolIndexMask = MaxPoolID << PoolIndexShift;
+ static constexpr ui32 ServiceMask = 0x80000000;
+ static constexpr ui32 NodeIdMask = MaxNodeId;
+
+ private:
+ union {
+ struct {
+ ui64 LocalId;
+ ui32 Hint;
+ ui32 NodeId;
+ } N;
+
+ struct {
+ ui64 X1;
+ ui64 X2;
+ } X;
+
+ ui8 Buf[16];
+ } Raw;
+
+ public:
+ TActorId() noexcept {
+ Raw.X.X1 = 0;
+ Raw.X.X2 = 0;
+ }
+
+ explicit TActorId(ui32 nodeId, ui32 poolId, ui64 localId, ui32 hint) noexcept {
+ Y_VERIFY_DEBUG(poolId <= MaxPoolID);
+ Raw.N.LocalId = localId;
+ Raw.N.Hint = hint;
+ Raw.N.NodeId = nodeId | (poolId << PoolIndexShift);
+ }
+
+ explicit TActorId(ui32 nodeId, const TStringBuf& x) noexcept {
+ Y_VERIFY(x.size() <= MaxServiceIDLength, "service id is too long");
+ Raw.N.LocalId = 0;
+ Raw.N.Hint = 0;
+ Raw.N.NodeId = nodeId | ServiceMask;
+ memcpy(Raw.Buf, x.data(), x.size());
+ }
+
+ explicit TActorId(ui64 x1, ui64 x2) noexcept {
+ Raw.X.X1 = x1;
+ Raw.X.X2 = x2;
+ }
+
+ explicit operator bool() const noexcept {
+ return Raw.X.X1 != 0 || Raw.X.X2 != 0;
+ }
+
+ ui64 LocalId() const noexcept {
+ return Raw.N.LocalId;
+ }
+
+ ui32 Hint() const noexcept {
+ return Raw.N.Hint;
+ }
+
+ ui32 NodeId() const noexcept {
+ return Raw.N.NodeId & NodeIdMask;
+ }
+
+ bool IsService() const noexcept {
+ return (Raw.N.NodeId & ServiceMask);
+ }
+
+ TStringBuf ServiceId() const noexcept {
+ Y_VERIFY_DEBUG(IsService());
+ return TStringBuf((const char*)Raw.Buf, MaxServiceIDLength);
+ }
+
+ static ui32 PoolIndex(ui32 nodeid) noexcept {
+ return ((nodeid & PoolIndexMask) >> PoolIndexShift);
+ }
+
+ ui32 PoolID() const noexcept {
+ return PoolIndex(Raw.N.NodeId);
+ }
+
+ ui64 RawX1() const noexcept {
+ return Raw.X.X1;
+ }
+
+ ui64 RawX2() const noexcept {
+ return Raw.X.X2;
+ }
+
+ bool operator<(const TActorId& x) const noexcept {
+ const ui64 s1 = Raw.X.X1;
+ const ui64 s2 = Raw.X.X2;
+ const ui64 x1 = x.Raw.X.X1;
+ const ui64 x2 = x.Raw.X.X2;
+
+ return (s1 != x1) ? (s1 < x1) : (s2 < x2);
+ }
+
+ bool operator!=(const TActorId& x) const noexcept {
+ return Raw.X.X1 != x.Raw.X.X1 || Raw.X.X2 != x.Raw.X.X2;
+ }
+
+ bool operator==(const TActorId& x) const noexcept {
+ return !(x != *this);
+ }
+
+ ui64 Hash() const noexcept {
+ const ui32* x = (const ui32*)Raw.Buf;
+
+ const ui64 x1 = x[0] * 0x001DFF3D8DC48F5Dull;
+ const ui64 x2 = x[1] * 0x179CA10C9242235Dull;
+ const ui64 x3 = x[2] * 0x0F530CAD458B0FB1ull;
+ const ui64 x4 = x[3] * 0xB5026F5AA96619E9ull;
+
+ const ui64 z1 = x1 + x2;
+ const ui64 z2 = x3 + x4;
+
+ const ui64 sum = 0x5851F42D4C957F2D + z1 + z2;
+
+ return (sum >> 32) | (sum << 32);
+ }
+
+ ui32 Hash32() const noexcept {
+ const ui32* x = (const ui32*)Raw.Buf;
+
+ const ui64 x1 = x[0] * 0x001DFF3D8DC48F5Dull;
+ const ui64 x2 = x[1] * 0x179CA10C9242235Dull;
+ const ui64 x3 = x[2] * 0x0F530CAD458B0FB1ull;
+ const ui64 x4 = x[3] * 0xB5026F5AA96619E9ull;
+
+ const ui64 z1 = x1 + x2;
+ const ui64 z2 = x3 + x4;
+
+ const ui64 sum = 0x5851F42D4C957F2D + z1 + z2;
+
+ return sum >> 32;
+ }
+
+ struct THash {
+ ui64 operator()(const TActorId& actorId) const noexcept {
+ return actorId.Hash();
+ }
+ };
+
+ struct THash32 {
+ ui64 operator()(const TActorId& actorId) const noexcept {
+ return actorId.Hash();
+ }
+ };
+
+ struct TOrderedCmp {
+ bool operator()(const TActorId &left, const TActorId &right) const noexcept {
+ Y_VERIFY_DEBUG(!left.IsService() && !right.IsService(), "ordered compare works for plain actorids only");
+ const ui32 n1 = left.NodeId();
+ const ui32 n2 = right.NodeId();
+
+ return (n1 != n2) ? (n1 < n2) : left.LocalId() < right.LocalId();
+ }
+ };
+
+ TString ToString() const;
+ void Out(IOutputStream& o) const;
+ bool Parse(const char* buf, ui32 sz);
+ };
+
+ static_assert(sizeof(TActorId) == 16, "expect sizeof(TActorId) == 16");
+ static_assert(MaxPools < TActorId::MaxPoolID); // current implementation of united pool has limit MaxPools on pool id
+}
+
+template <>
+inline void Out<NActors::TActorId>(IOutputStream& o, const NActors::TActorId& x) {
+ return x.Out(o);
+}
+
+template <>
+struct THash<NActors::TActorId> {
+ inline ui64 operator()(const NActors::TActorId& x) const {
+ return x.Hash();
+ }
+};
diff --git a/library/cpp/actors/core/actorsystem.cpp b/library/cpp/actors/core/actorsystem.cpp
new file mode 100644
index 0000000000..c58698a206
--- /dev/null
+++ b/library/cpp/actors/core/actorsystem.cpp
@@ -0,0 +1,277 @@
+#include "defs.h"
+#include "actorsystem.h"
+#include "callstack.h"
+#include "cpu_manager.h"
+#include "mailbox.h"
+#include "events.h"
+#include "interconnect.h"
+#include "servicemap.h"
+#include "scheduler_queue.h"
+#include "scheduler_actor.h"
+#include "log.h"
+#include "probes.h"
+#include "ask.h"
+#include <library/cpp/actors/util/affinity.h>
+#include <library/cpp/actors/util/datetime.h>
+#include <util/generic/hash.h>
+#include <util/system/rwlock.h>
+#include <util/random/random.h>
+
+namespace NActors {
+ LWTRACE_USING(ACTORLIB_PROVIDER);
+
+ struct TActorSystem::TServiceMap : TNonCopyable {
+ NActors::TServiceMap<TActorId, TActorId, TActorId::THash> LocalMap;
+ TTicketLock Lock;
+
+ TActorId RegisterLocalService(const TActorId& serviceId, const TActorId& actorId) {
+ TTicketLock::TGuard guard(&Lock);
+ const TActorId old = LocalMap.Update(serviceId, actorId);
+ return old;
+ }
+
+ TActorId LookupLocal(const TActorId& x) {
+ return LocalMap.Find(x);
+ }
+ };
+
+ TActorSystem::TActorSystem(THolder<TActorSystemSetup>& setup, void* appData,
+ TIntrusivePtr<NLog::TSettings> loggerSettings)
+ : NodeId(setup->NodeId)
+ , CpuManager(new TCpuManager(setup))
+ , ExecutorPoolCount(CpuManager->GetExecutorsCount())
+ , Scheduler(setup->Scheduler)
+ , InterconnectCount((ui32)setup->Interconnect.ProxyActors.size())
+ , CurrentTimestamp(0)
+ , CurrentMonotonic(0)
+ , CurrentIDCounter(RandomNumber<ui64>())
+ , SystemSetup(setup.Release())
+ , DefSelfID(NodeId, "actorsystem")
+ , AppData0(appData)
+ , LoggerSettings0(loggerSettings)
+ , StartExecuted(false)
+ , StopExecuted(false)
+ , CleanupExecuted(false)
+ {
+ ServiceMap.Reset(new TServiceMap());
+ }
+
+ TActorSystem::~TActorSystem() {
+ Cleanup();
+ }
+
+ bool TActorSystem::Send(TAutoPtr<IEventHandle> ev) const {
+ if (Y_UNLIKELY(!ev))
+ return false;
+
+#ifdef USE_ACTOR_CALLSTACK
+ ev->Callstack.TraceIfEmpty();
+#endif
+
+ TActorId recipient = ev->GetRecipientRewrite();
+ const ui32 recpNodeId = recipient.NodeId();
+
+ if (recpNodeId != NodeId && recpNodeId != 0) {
+ // if recipient is not local one - rewrite with forward instruction
+ Y_VERIFY_DEBUG(!ev->HasEvent() || ev->GetBase()->IsSerializable());
+ Y_VERIFY(ev->Recipient == recipient,
+ "Event rewrite from %s to %s would be lost via interconnect",
+ ev->Recipient.ToString().c_str(),
+ recipient.ToString().c_str());
+ recipient = InterconnectProxy(recpNodeId);
+ ev->Rewrite(TEvInterconnect::EvForward, recipient);
+ }
+ if (recipient.IsService()) {
+ TActorId target = ServiceMap->LookupLocal(recipient);
+ if (!target && IsInterconnectProxyId(recipient) && ProxyWrapperFactory) {
+ const TActorId actorId = ProxyWrapperFactory(const_cast<TActorSystem*>(this),
+ GetInterconnectProxyNode(recipient));
+ with_lock(ProxyCreationLock) {
+ target = ServiceMap->LookupLocal(recipient);
+ if (!target) {
+ target = actorId;
+ ServiceMap->RegisterLocalService(recipient, target);
+ }
+ }
+ if (target != actorId) {
+ // a race has occured, terminate newly created actor
+ Send(new IEventHandle(TEvents::TSystem::Poison, 0, actorId, {}, nullptr, 0));
+ }
+ }
+ recipient = target;
+ ev->Rewrite(ev->GetTypeRewrite(), recipient);
+ }
+
+ Y_VERIFY_DEBUG(recipient == ev->GetRecipientRewrite());
+ const ui32 recpPool = recipient.PoolID();
+ if (recipient && recpPool < ExecutorPoolCount) {
+ if (CpuManager->GetExecutorPool(recpPool)->Send(ev)) {
+ return true;
+ }
+ }
+
+ Send(ev->ForwardOnNondelivery(TEvents::TEvUndelivered::ReasonActorUnknown));
+ return false;
+ }
+
+ bool TActorSystem::Send(const TActorId& recipient, IEventBase* ev, ui32 flags) const {
+ return this->Send(new IEventHandle(recipient, DefSelfID, ev, flags));
+ }
+
+ void TActorSystem::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) const {
+ Schedule(deadline - Timestamp(), ev, cookie);
+ }
+
+ void TActorSystem::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) const {
+ const auto current = Monotonic();
+ if (deadline < current)
+ deadline = current;
+
+ TTicketLock::TGuard guard(&ScheduleLock);
+ ScheduleQueue->Writer.Push(deadline.MicroSeconds(), ev.Release(), cookie);
+ }
+
+ void TActorSystem::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) const {
+ const auto deadline = Monotonic() + delta;
+
+ TTicketLock::TGuard guard(&ScheduleLock);
+ ScheduleQueue->Writer.Push(deadline.MicroSeconds(), ev.Release(), cookie);
+ }
+
+ TActorId TActorSystem::Register(IActor* actor, TMailboxType::EType mailboxType, ui32 executorPool, ui64 revolvingCounter,
+ const TActorId& parentId) {
+ Y_VERIFY(executorPool < ExecutorPoolCount, "executorPool# %" PRIu32 ", ExecutorPoolCount# %" PRIu32,
+ (ui32)executorPool, (ui32)ExecutorPoolCount);
+ return CpuManager->GetExecutorPool(executorPool)->Register(actor, mailboxType, revolvingCounter, parentId);
+ }
+
+ NThreading::TFuture<THolder<IEventBase>> TActorSystem::AskGeneric(TMaybe<ui32> expectedEventType,
+ TActorId recipient, THolder<IEventBase> event,
+ TDuration timeout) {
+ auto promise = NThreading::NewPromise<THolder<IEventBase>>();
+ Register(MakeAskActor(expectedEventType, recipient, std::move(event), timeout, promise).Release());
+ return promise.GetFuture();
+ }
+
+ ui64 TActorSystem::AllocateIDSpace(ui64 count) {
+ Y_VERIFY_DEBUG(count < Max<ui32>() / 65536);
+
+ static_assert(sizeof(TAtomic) == sizeof(ui64), "expect sizeof(TAtomic) == sizeof(ui64)");
+
+ // get high 32 bits as seconds from epoch
+ // it could wrap every century, but we don't expect any actor-reference to live this long so such wrap will do no harm
+ const ui64 timeFromEpoch = TInstant::MicroSeconds(RelaxedLoad(&CurrentTimestamp)).Seconds();
+
+ // get low 32 bits as counter value
+ ui32 lowPartEnd = (ui32)(AtomicAdd(CurrentIDCounter, count));
+ while (lowPartEnd < count) // if our request crosses 32bit boundary - retry
+ lowPartEnd = (ui32)(AtomicAdd(CurrentIDCounter, count));
+
+ const ui64 lowPart = lowPartEnd - count;
+ const ui64 ret = (timeFromEpoch << 32) | lowPart;
+
+ return ret;
+ }
+
+ TActorId TActorSystem::InterconnectProxy(ui32 destinationNode) const {
+ if (destinationNode < InterconnectCount)
+ return Interconnect[destinationNode];
+ else if (destinationNode != NodeId)
+ return MakeInterconnectProxyId(destinationNode);
+ else
+ return TActorId();
+ }
+
+ ui32 TActorSystem::BroadcastToProxies(const std::function<IEventHandle*(const TActorId&)>& eventFabric) {
+ // TODO: get rid of this method
+ for (ui32 i = 0; i < InterconnectCount; ++i) {
+ Send(eventFabric(Interconnect[i]));
+ }
+ return InterconnectCount;
+ }
+
+ TActorId TActorSystem::LookupLocalService(const TActorId& x) const {
+ return ServiceMap->LookupLocal(x);
+ }
+
+ TActorId TActorSystem::RegisterLocalService(const TActorId& serviceId, const TActorId& actorId) {
+ // TODO: notify old actor about demotion
+ return ServiceMap->RegisterLocalService(serviceId, actorId);
+ }
+
+ void TActorSystem::GetPoolStats(ui32 poolId, TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const {
+ CpuManager->GetPoolStats(poolId, poolStats, statsCopy);
+ }
+
+ void TActorSystem::Start() {
+ Y_VERIFY(StartExecuted == false);
+ StartExecuted = true;
+
+ ScheduleQueue.Reset(new NSchedulerQueue::TQueueType());
+ TVector<NSchedulerQueue::TReader*> scheduleReaders;
+ scheduleReaders.push_back(&ScheduleQueue->Reader);
+ CpuManager->PrepareStart(scheduleReaders, this);
+ Scheduler->Prepare(this, &CurrentTimestamp, &CurrentMonotonic);
+ Scheduler->PrepareSchedules(&scheduleReaders.front(), (ui32)scheduleReaders.size());
+
+ // setup interconnect proxies
+ {
+ const TInterconnectSetup& setup = SystemSetup->Interconnect;
+ Interconnect.Reset(new TActorId[InterconnectCount + 1]);
+ for (ui32 i = 0, e = InterconnectCount; i != e; ++i) {
+ const TActorSetupCmd& x = setup.ProxyActors[i];
+ if (x.Actor) {
+ Interconnect[i] = Register(x.Actor, x.MailboxType, x.PoolId, i);
+ Y_VERIFY(!!Interconnect[i]);
+ }
+ }
+ ProxyWrapperFactory = std::move(SystemSetup->Interconnect.ProxyWrapperFactory);
+ }
+
+ // setup local services
+ {
+ for (ui32 i = 0, e = (ui32)SystemSetup->LocalServices.size(); i != e; ++i) {
+ const std::pair<TActorId, TActorSetupCmd>& x = SystemSetup->LocalServices[i];
+ const TActorId xid = Register(x.second.Actor, x.second.MailboxType, x.second.PoolId, i);
+ Y_VERIFY(!!xid);
+ if (!!x.first)
+ RegisterLocalService(x.first, xid);
+ }
+ }
+
+ // ok, setup complete, we could destroy setup config
+ SystemSetup.Destroy();
+
+ Scheduler->PrepareStart();
+ CpuManager->Start();
+ Send(MakeSchedulerActorId(), new TEvSchedulerInitialize(scheduleReaders, &CurrentTimestamp, &CurrentMonotonic));
+ Scheduler->Start();
+ }
+
+ void TActorSystem::Stop() {
+ if (StopExecuted || !StartExecuted)
+ return;
+
+ StopExecuted = true;
+
+ for (auto&& fn : std::exchange(DeferredPreStop, {})) {
+ fn();
+ }
+
+ Scheduler->PrepareStop();
+ CpuManager->PrepareStop();
+ Scheduler->Stop();
+ CpuManager->Shutdown();
+ }
+
+ void TActorSystem::Cleanup() {
+ Stop();
+ if (CleanupExecuted || !StartExecuted)
+ return;
+ CleanupExecuted = true;
+ CpuManager->Cleanup();
+ Scheduler.Destroy();
+ }
+
+ ui32 TActorSystem::MemProfActivityBase;
+}
diff --git a/library/cpp/actors/core/actorsystem.h b/library/cpp/actors/core/actorsystem.h
new file mode 100644
index 0000000000..40499d7586
--- /dev/null
+++ b/library/cpp/actors/core/actorsystem.h
@@ -0,0 +1,367 @@
+#pragma once
+
+#include "defs.h"
+
+#include "actor.h"
+#include "balancer.h"
+#include "config.h"
+#include "event.h"
+#include "log_settings.h"
+#include "scheduler_cookie.h"
+#include "mon_stats.h"
+
+#include <library/cpp/threading/future/future.h>
+#include <library/cpp/actors/util/ticket_lock.h>
+
+#include <util/generic/vector.h>
+#include <util/datetime/base.h>
+#include <util/system/mutex.h>
+
+namespace NActors {
+ class TActorSystem;
+ class TCpuManager;
+ class IExecutorPool;
+ struct TWorkerContext;
+
+ inline TActorId MakeInterconnectProxyId(ui32 destNodeId) {
+ char data[12];
+ memcpy(data, "ICProxy@", 8);
+ memcpy(data + 8, &destNodeId, sizeof(ui32));
+ return TActorId(0, TStringBuf(data, 12));
+ }
+
+ inline bool IsInterconnectProxyId(const TActorId& actorId) {
+ return actorId.IsService() && !memcmp(actorId.ServiceId().data(), "ICProxy@", 8);
+ }
+
+ inline ui32 GetInterconnectProxyNode(const TActorId& actorId) {
+ ui32 nodeId;
+ memcpy(&nodeId, actorId.ServiceId().data() + 8, sizeof(ui32));
+ return nodeId;
+ }
+
+ namespace NSchedulerQueue {
+ class TReader;
+ struct TQueueType;
+ }
+
+ class IExecutorPool : TNonCopyable {
+ public:
+ const ui32 PoolId;
+
+ TAtomic ActorRegistrations;
+ TAtomic DestroyedActors;
+
+ IExecutorPool(ui32 poolId)
+ : PoolId(poolId)
+ , ActorRegistrations(0)
+ , DestroyedActors(0)
+ {
+ }
+
+ virtual ~IExecutorPool() {
+ }
+
+ // for workers
+ virtual ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) = 0;
+ virtual void ReclaimMailbox(TMailboxType::EType mailboxType, ui32 hint, TWorkerId workerId, ui64 revolvingCounter) = 0;
+
+ /**
+ * Schedule one-shot event that will be send at given time point in the future.
+ *
+ * @param deadline the wallclock time point in future when event must be send
+ * @param ev the event to send
+ * @param cookie cookie that will be piggybacked with event
+ * @param workerId index of thread which will perform event dispatching
+ */
+ virtual void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) = 0;
+
+ /**
+ * Schedule one-shot event that will be send at given time point in the future.
+ *
+ * @param deadline the monotonic time point in future when event must be send
+ * @param ev the event to send
+ * @param cookie cookie that will be piggybacked with event
+ * @param workerId index of thread which will perform event dispatching
+ */
+ virtual void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) = 0;
+
+ /**
+ * Schedule one-shot event that will be send after given delay.
+ *
+ * @param delta the time from now to delay event sending
+ * @param ev the event to send
+ * @param cookie cookie that will be piggybacked with event
+ * @param workerId index of thread which will perform event dispatching
+ */
+ virtual void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) = 0;
+
+ // for actorsystem
+ virtual bool Send(TAutoPtr<IEventHandle>& ev) = 0;
+ virtual void ScheduleActivation(ui32 activation) = 0;
+ virtual void ScheduleActivationEx(ui32 activation, ui64 revolvingCounter) = 0;
+ virtual TActorId Register(IActor* actor, TMailboxType::EType mailboxType, ui64 revolvingCounter, const TActorId& parentId) = 0;
+ virtual TActorId Register(IActor* actor, TMailboxHeader* mailbox, ui32 hint, const TActorId& parentId) = 0;
+
+ // lifecycle stuff
+ virtual void Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) = 0;
+ virtual void Start() = 0;
+ virtual void PrepareStop() = 0;
+ virtual void Shutdown() = 0;
+ virtual bool Cleanup() = 0;
+
+ virtual void GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const {
+ // TODO: make pure virtual and override everywhere
+ Y_UNUSED(poolStats);
+ Y_UNUSED(statsCopy);
+ }
+
+ virtual TString GetName() const {
+ return TString();
+ }
+
+ virtual ui32 GetThreads() const {
+ return 1;
+ }
+
+ // generic
+ virtual TAffinity* Affinity() const = 0;
+
+ virtual void SetRealTimeMode() const {}
+ };
+
+ // could be proxy to in-pool schedulers (for NUMA-aware executors)
+ class ISchedulerThread : TNonCopyable {
+ public:
+ virtual ~ISchedulerThread() {
+ }
+
+ virtual void Prepare(TActorSystem* actorSystem, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic) = 0;
+ virtual void PrepareSchedules(NSchedulerQueue::TReader** readers, ui32 scheduleReadersCount) = 0;
+ virtual void PrepareStart() { /* empty */ }
+ virtual void Start() = 0;
+ virtual void PrepareStop() = 0;
+ virtual void Stop() = 0;
+ };
+
+ struct TActorSetupCmd {
+ TMailboxType::EType MailboxType;
+ ui32 PoolId;
+ IActor* Actor;
+
+ TActorSetupCmd()
+ : MailboxType(TMailboxType::HTSwap)
+ , PoolId(0)
+ , Actor(nullptr)
+ {
+ }
+
+ TActorSetupCmd(IActor* actor, TMailboxType::EType mailboxType, ui32 poolId)
+ : MailboxType(mailboxType)
+ , PoolId(poolId)
+ , Actor(actor)
+ {
+ }
+
+ void Set(IActor* actor, TMailboxType::EType mailboxType, ui32 poolId) {
+ MailboxType = mailboxType;
+ PoolId = poolId;
+ Actor = actor;
+ }
+ };
+
+ using TProxyWrapperFactory = std::function<TActorId(TActorSystem*, ui32)>;
+
+ struct TInterconnectSetup {
+ TVector<TActorSetupCmd> ProxyActors;
+ TProxyWrapperFactory ProxyWrapperFactory;
+ };
+
+ struct TActorSystemSetup {
+ ui32 NodeId = 0;
+
+ // Either Executors or CpuManager must be initialized
+ ui32 ExecutorsCount = 0;
+ TArrayHolder<TAutoPtr<IExecutorPool>> Executors;
+
+ TAutoPtr<IBalancer> Balancer; // main implementation will be implicitly created if not set
+
+ TCpuManagerConfig CpuManager;
+
+ TAutoPtr<ISchedulerThread> Scheduler;
+ ui32 MaxActivityType = 5; // for default entries
+
+ TInterconnectSetup Interconnect;
+
+ using TLocalServices = TVector<std::pair<TActorId, TActorSetupCmd>>;
+ TLocalServices LocalServices;
+
+ ui32 GetExecutorsCount() const {
+ return Executors ? ExecutorsCount : CpuManager.GetExecutorsCount();
+ }
+
+ TString GetPoolName(ui32 poolId) const {
+ return Executors ? Executors[poolId]->GetName() : CpuManager.GetPoolName(poolId);
+ }
+
+ ui32 GetThreads(ui32 poolId) const {
+ return Executors ? Executors[poolId]->GetThreads() : CpuManager.GetThreads(poolId);
+ }
+ };
+
+ class TActorSystem : TNonCopyable {
+ struct TServiceMap;
+
+ public:
+ const ui32 NodeId;
+
+ private:
+ THolder<TCpuManager> CpuManager;
+ const ui32 ExecutorPoolCount;
+
+ TAutoPtr<ISchedulerThread> Scheduler;
+ THolder<TServiceMap> ServiceMap;
+
+ const ui32 InterconnectCount;
+ TArrayHolder<TActorId> Interconnect;
+
+ volatile ui64 CurrentTimestamp;
+ volatile ui64 CurrentMonotonic;
+ volatile ui64 CurrentIDCounter;
+
+ THolder<NSchedulerQueue::TQueueType> ScheduleQueue;
+ mutable TTicketLock ScheduleLock;
+
+ friend class TExecutorThread;
+
+ THolder<TActorSystemSetup> SystemSetup;
+ TActorId DefSelfID;
+ void* AppData0;
+ TIntrusivePtr<NLog::TSettings> LoggerSettings0;
+ TProxyWrapperFactory ProxyWrapperFactory;
+ TMutex ProxyCreationLock;
+
+ bool StartExecuted;
+ bool StopExecuted;
+ bool CleanupExecuted;
+
+ std::deque<std::function<void()>> DeferredPreStop;
+ public:
+ TActorSystem(THolder<TActorSystemSetup>& setup, void* appData = nullptr,
+ TIntrusivePtr<NLog::TSettings> loggerSettings = TIntrusivePtr<NLog::TSettings>(nullptr));
+ ~TActorSystem();
+
+ void Start();
+ void Stop();
+ void Cleanup();
+
+ TActorId Register(IActor* actor, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 executorPool = 0,
+ ui64 revolvingCounter = 0, const TActorId& parentId = TActorId());
+
+ bool Send(TAutoPtr<IEventHandle> ev) const;
+ bool Send(const TActorId& recipient, IEventBase* ev, ui32 flags = 0) const;
+
+ /**
+ * Schedule one-shot event that will be send at given time point in the future.
+ *
+ * @param deadline the wallclock time point in future when event must be send
+ * @param ev the event to send
+ * @param cookie cookie that will be piggybacked with event
+ */
+ void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr) const;
+
+ /**
+ * Schedule one-shot event that will be send at given time point in the future.
+ *
+ * @param deadline the monotonic time point in future when event must be send
+ * @param ev the event to send
+ * @param cookie cookie that will be piggybacked with event
+ */
+ void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr) const;
+
+ /**
+ * Schedule one-shot event that will be send after given delay.
+ *
+ * @param delta the time from now to delay event sending
+ * @param ev the event to send
+ * @param cookie cookie that will be piggybacked with event
+ */
+ void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr) const;
+
+ /**
+ * A way to interact with actors from non-actor context.
+ *
+ * This method will send the `event` to the `recipient` and then will wait for a response. When response arrives,
+ * it will be passed to the future. If response is not of type `T`, the future will resolve into an exception.
+ *
+ * @tparam T expected response type. Must be derived from `TEventBase`,
+ * or use `IEventBase` to catch any response.
+ * @param actorSystem actor system that will be used to register an actor that'll wait for response.
+ * @param recipient who will get a request.
+ * @param event a request message.
+ * @return future that will be resolved when a message from `recipient` arrives.
+ */
+ template <typename T>
+ [[nodiscard]]
+ NThreading::TFuture<THolder<T>> Ask(TActorId recipient, THolder<IEventBase> event, TDuration timeout = TDuration::Max()) {
+ if constexpr (std::is_same_v<T, IEventBase>) {
+ return AskGeneric(Nothing(), recipient, std::move(event), timeout);
+ } else {
+ return AskGeneric(T::EventType, recipient, std::move(event), timeout)
+ .Apply([](const NThreading::TFuture<THolder<IEventBase>>& ev) {
+ return THolder<T>(static_cast<T*>(const_cast<THolder<IEventBase>&>(ev.GetValueSync()).Release())); // =(
+ });
+ }
+ }
+
+ [[nodiscard]]
+ NThreading::TFuture<THolder<IEventBase>> AskGeneric(
+ TMaybe<ui32> expectedEventType,
+ TActorId recipient,
+ THolder<IEventBase> event,
+ TDuration timeout);
+
+ ui64 AllocateIDSpace(ui64 count);
+
+ TActorId InterconnectProxy(ui32 destinationNode) const;
+ ui32 BroadcastToProxies(const std::function<IEventHandle*(const TActorId&)>&);
+
+ void UpdateLinkStatus(ui8 status, ui32 destinationNode);
+ ui8 LinkStatus(ui32 destinationNode);
+
+ TActorId LookupLocalService(const TActorId& x) const;
+ TActorId RegisterLocalService(const TActorId& serviceId, const TActorId& actorId);
+
+ ui32 GetMaxActivityType() const {
+ return SystemSetup ? SystemSetup->MaxActivityType : 1;
+ }
+
+ TInstant Timestamp() const {
+ return TInstant::MicroSeconds(RelaxedLoad(&CurrentTimestamp));
+ }
+
+ TMonotonic Monotonic() const {
+ return TMonotonic::MicroSeconds(RelaxedLoad(&CurrentMonotonic));
+ }
+
+ template <typename T>
+ T* AppData() const {
+ return (T*)AppData0;
+ }
+
+ NLog::TSettings* LoggerSettings() const {
+ return LoggerSettings0.Get();
+ }
+
+ void GetPoolStats(ui32 poolId, TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const;
+
+ void DeferPreStop(std::function<void()> fn) {
+ DeferredPreStop.push_back(std::move(fn));
+ }
+
+ /* This is the base for memory profiling tags.
+ System sets memory profiling tag for debug version of lfalloc.
+ The tag is set as "base_tag + actor_activity_type". */
+ static ui32 MemProfActivityBase;
+ };
+}
diff --git a/library/cpp/actors/core/actorsystem_ut.cpp b/library/cpp/actors/core/actorsystem_ut.cpp
new file mode 100644
index 0000000000..231d6f0ca1
--- /dev/null
+++ b/library/cpp/actors/core/actorsystem_ut.cpp
@@ -0,0 +1,45 @@
+#include "actorsystem.h"
+
+#include <library/cpp/actors/testlib/test_runtime.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NActors;
+
+Y_UNIT_TEST_SUITE(TActorSystemTest) {
+
+ class TTestActor: public TActor<TTestActor> {
+ public:
+ TTestActor()
+ : TActor{&TThis::Main}
+ {
+ }
+
+ STATEFN(Main) {
+ Y_UNUSED(ev);
+ }
+ };
+
+ THolder<TTestActorRuntimeBase> CreateRuntime() {
+ auto runtime = MakeHolder<TTestActorRuntimeBase>();
+ runtime->SetScheduledEventFilter([](auto&&, auto&&, auto&&, auto&&) { return false; });
+ runtime->Initialize();
+ return runtime;
+ }
+
+ Y_UNIT_TEST(LocalService) {
+ THolder<TTestActorRuntimeBase> runtime = CreateRuntime();
+ auto actorA = runtime->Register(new TTestActor);
+ auto actorB = runtime->Register(new TTestActor);
+
+ TActorId myServiceId{0, TStringBuf{"my-service"}};
+
+ auto prevActorId = runtime->RegisterService(myServiceId, actorA);
+ UNIT_ASSERT(!prevActorId);
+ UNIT_ASSERT_EQUAL(runtime->GetLocalServiceId(myServiceId), actorA);
+
+ prevActorId = runtime->RegisterService(myServiceId, actorB);
+ UNIT_ASSERT(prevActorId);
+ UNIT_ASSERT_EQUAL(prevActorId, actorA);
+ UNIT_ASSERT_EQUAL(runtime->GetLocalServiceId(myServiceId), actorB);
+ }
+}
diff --git a/library/cpp/actors/core/ask.cpp b/library/cpp/actors/core/ask.cpp
new file mode 100644
index 0000000000..0054c9a906
--- /dev/null
+++ b/library/cpp/actors/core/ask.cpp
@@ -0,0 +1,74 @@
+#include "ask.h"
+
+#include "actor_bootstrapped.h"
+#include "actorid.h"
+#include "event.h"
+#include "hfunc.h"
+
+namespace NActors {
+ namespace {
+ class TAskActor: public TActorBootstrapped<TAskActor> {
+ enum {
+ Timeout = EventSpaceBegin(TEvents::ES_PRIVATE),
+ };
+
+ // We can't use the standard timeout event because recipient may send us one.
+ struct TTimeout: public TEventLocal<TTimeout, Timeout> {
+ };
+
+ public:
+ TAskActor(
+ TMaybe<ui32> expectedEventType,
+ TActorId recipient,
+ THolder<IEventBase> event,
+ TDuration timeout,
+ const NThreading::TPromise<THolder<IEventBase>>& promise)
+ : ExpectedEventType_(expectedEventType)
+ , Recipient_(recipient)
+ , Event_(std::move(event))
+ , Timeout_(timeout)
+ , Promise_(promise)
+ {
+ }
+
+ public:
+ void Bootstrap() {
+ Send(Recipient_, std::move(Event_));
+ Become(&TAskActor::Waiting);
+
+ if (Timeout_ != TDuration::Max()) {
+ Schedule(Timeout_, new TTimeout);
+ }
+ }
+
+ STATEFN(Waiting) {
+ if (ev->GetTypeRewrite() == TTimeout::EventType) {
+ Promise_.SetException(std::make_exception_ptr(yexception() << "ask timeout"));
+ } else if (!ExpectedEventType_ || ev->GetTypeRewrite() == ExpectedEventType_) {
+ Promise_.SetValue(ev->ReleaseBase());
+ } else {
+ Promise_.SetException(std::make_exception_ptr(yexception() << "received unexpected response " << ev->GetBase()->ToString()));
+ }
+
+ PassAway();
+ }
+
+ public:
+ TMaybe<ui32> ExpectedEventType_;
+ TActorId Recipient_;
+ THolder<IEventBase> Event_;
+ TDuration Timeout_;
+ NThreading::TPromise<THolder<IEventBase>> Promise_;
+ };
+ }
+
+ THolder<IActor> MakeAskActor(
+ TMaybe<ui32> expectedEventType,
+ TActorId recipient,
+ THolder<IEventBase> event,
+ TDuration timeout,
+ const NThreading::TPromise<THolder<IEventBase>>& promise)
+ {
+ return MakeHolder<TAskActor>(expectedEventType, std::move(recipient), std::move(event), timeout, promise);
+ }
+}
diff --git a/library/cpp/actors/core/ask.h b/library/cpp/actors/core/ask.h
new file mode 100644
index 0000000000..036f1833a4
--- /dev/null
+++ b/library/cpp/actors/core/ask.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "actor.h"
+#include "event.h"
+
+#include <library/cpp/threading/future/future.h>
+
+namespace NActors {
+ /**
+ * See `TActorSystem::Ask`.
+ */
+ THolder<IActor> MakeAskActor(
+ TMaybe<ui32> expectedEventType,
+ TActorId recipient,
+ THolder<IEventBase> event,
+ TDuration timeout,
+ const NThreading::TPromise<THolder<IEventBase>>& promise);
+}
diff --git a/library/cpp/actors/core/ask_ut.cpp b/library/cpp/actors/core/ask_ut.cpp
new file mode 100644
index 0000000000..e72ebdba9b
--- /dev/null
+++ b/library/cpp/actors/core/ask_ut.cpp
@@ -0,0 +1,131 @@
+#include <library/cpp/testing/unittest/registar.h>
+
+#include "actorsystem.h"
+
+#include <library/cpp/actors/testlib/test_runtime.h>
+
+using namespace NActors;
+
+class TPingPong: public TActor<TPingPong> {
+public:
+ TPingPong()
+ : TActor(&TPingPong::Main)
+ {
+ }
+
+ STATEFN(Main) {
+ switch (ev->GetTypeRewrite()) {
+ hFunc(TEvents::TEvPing, OnPing);
+ hFunc(TEvents::TEvBlob, OnBlob);
+ }
+ }
+
+ void OnPing(const TEvents::TEvPing::TPtr& ev) {
+ Send(ev->Sender, new TEvents::TEvPong);
+ }
+
+ void OnBlob(const TEvents::TEvBlob::TPtr& ev) {
+ Send(ev->Sender, ev->Release().Release());
+ }
+};
+
+class TPing: public TActor<TPing> {
+public:
+ TPing()
+ : TActor(&TPing::Main)
+ {
+ }
+
+ STATEFN(Main) {
+ Y_UNUSED(ev);
+ }
+};
+
+THolder<TTestActorRuntimeBase> CreateRuntime() {
+ auto runtime = MakeHolder<TTestActorRuntimeBase>();
+ runtime->SetScheduledEventFilter([](auto&&, auto&&, auto&&, auto&&) { return false; });
+ runtime->Initialize();
+ return runtime;
+}
+
+Y_UNIT_TEST_SUITE(AskActor) {
+ Y_UNIT_TEST(Ok) {
+ auto runtime = CreateRuntime();
+ auto pingpong = runtime->Register(new TPingPong);
+
+ {
+ auto fut = runtime->GetAnyNodeActorSystem()->Ask<TEvents::TEvPong>(
+ pingpong,
+ THolder(new TEvents::TEvPing));
+ runtime->DispatchEvents();
+ fut.ExtractValueSync();
+ }
+
+ {
+ auto fut = runtime->GetAnyNodeActorSystem()->Ask<TEvents::TEvBlob>(
+ pingpong,
+ THolder(new TEvents::TEvBlob("hello!")));
+ runtime->DispatchEvents();
+ auto ev = fut.ExtractValueSync();
+ UNIT_ASSERT_VALUES_EQUAL(ev->Blob, "hello!");
+ }
+
+ {
+ auto fut = runtime->GetAnyNodeActorSystem()->Ask<IEventBase>(
+ pingpong,
+ THolder(new TEvents::TEvPing));
+ runtime->DispatchEvents();
+ auto ev = fut.ExtractValueSync();
+ UNIT_ASSERT_VALUES_EQUAL(ev->Type(), TEvents::TEvPong::EventType);
+ }
+ }
+
+ Y_UNIT_TEST(Err) {
+ auto runtime = CreateRuntime();
+ auto pingpong = runtime->Register(new TPingPong);
+
+ {
+ auto fut = runtime->GetAnyNodeActorSystem()->Ask<TEvents::TEvBlob>(
+ pingpong,
+ THolder(new TEvents::TEvPing));
+ runtime->DispatchEvents();
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ fut.ExtractValueSync(),
+ yexception,
+ "received unexpected response HelloWorld: Pong");
+ }
+ }
+
+ Y_UNIT_TEST(Timeout) {
+ auto runtime = CreateRuntime();
+ auto ping = runtime->Register(new TPing);
+
+ {
+ auto fut = runtime->GetAnyNodeActorSystem()->Ask<TEvents::TEvPong>(
+ ping,
+ THolder(new TEvents::TEvPing),
+ TDuration::Seconds(1));
+ auto start = runtime->GetCurrentTime();
+ runtime->DispatchEvents({}, TDuration::Seconds(5));
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ fut.ExtractValueSync(),
+ yexception,
+ "ask timeout");
+ UNIT_ASSERT_VALUES_EQUAL(runtime->GetCurrentTime() - start, TDuration::Seconds(1));
+ }
+
+ {
+ auto fut = runtime->GetAnyNodeActorSystem()->Ask<IEventBase>(
+ ping,
+ THolder(new TEvents::TEvPing),
+ TDuration::Seconds(1));
+ auto start = runtime->GetCurrentTime();
+ runtime->DispatchEvents({}, TDuration::Seconds(5));
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ fut.ExtractValueSync(),
+ yexception,
+ "ask timeout");
+ UNIT_ASSERT_VALUES_EQUAL(runtime->GetCurrentTime() - start, TDuration::Seconds(1));
+ }
+ }
+}
diff --git a/library/cpp/actors/core/balancer.cpp b/library/cpp/actors/core/balancer.cpp
new file mode 100644
index 0000000000..cc5417b0b5
--- /dev/null
+++ b/library/cpp/actors/core/balancer.cpp
@@ -0,0 +1,293 @@
+#include "balancer.h"
+
+#include "probes.h"
+
+#include <library/cpp/actors/util/intrinsics.h>
+#include <library/cpp/actors/util/datetime.h>
+
+#include <util/system/spinlock.h>
+
+#include <algorithm>
+
+namespace NActors {
+ LWTRACE_USING(ACTORLIB_PROVIDER);
+
+ // Describes balancing-related state of pool, the most notable is `Importance` to add new cpu
+ struct TLevel {
+ // Balancer will try to give more cpu to overloaded pools
+ enum ELoadClass {
+ Underloaded = 0,
+ Moderate = 1,
+ Overloaded = 2,
+ };
+
+ double ScaleFactor;
+ ELoadClass LoadClass;
+ ui64 Importance; // pool with lower importance is allowed to pass cpu to pool with higher, but the opposite is forbidden
+
+ TLevel() {}
+
+ TLevel(const TBalancingConfig& cfg, TPoolId poolId, ui64 currentCpus, double cpuIdle) {
+ ScaleFactor = double(currentCpus) / cfg.Cpus;
+ if (cpuIdle > 1.3) { // TODO: add a better underload criterion, based on estimated latency w/o 1 cpu
+ LoadClass = Underloaded;
+ } else if (cpuIdle < 0.2) { // TODO: add a better overload criterion, based on latency
+ LoadClass = Overloaded;
+ } else {
+ LoadClass = Moderate;
+ }
+ Importance = MakeImportance(LoadClass, cfg.Priority, ScaleFactor, cpuIdle, poolId);
+ }
+
+ private:
+ // Importance is simple ui64 value (from highest to lowest):
+ // 2 Bits: LoadClass
+ // 8 Bits: Priority
+ // 10 Bits: -ScaleFactor (for max-min fairness with weights equal to TBalancingConfig::Cpus)
+ // 10 Bits: -CpuIdle
+ // 6 Bits: PoolId
+ static ui64 MakeImportance(ELoadClass load, ui8 priority, double scaleFactor, double cpuIdle, TPoolId poolId) {
+ ui64 idle = std::clamp<i64>(1024 - cpuIdle * 512, 0, 1023);
+ ui64 scale = std::clamp<i64>(1024 - scaleFactor * 32, 0, 1023);
+
+ Y_VERIFY(ui64(load) < (1ull << 2ull));
+ Y_VERIFY(ui64(priority) < (1ull << 8ull));
+ Y_VERIFY(ui64(scale) < (1ull << 10ull));
+ Y_VERIFY(ui64(idle) < (1ull << 10ull));
+ Y_VERIFY(ui64(poolId) < (1ull << 6ull));
+
+ static_assert(ui64(MaxPools) <= (1ull << 6ull));
+
+ ui64 importance =
+ (ui64(load) << ui64(6 + 10 + 10 + 8)) |
+ (ui64(priority) << ui64(6 + 10 + 10)) |
+ (ui64(scale) << ui64(6 + 10)) |
+ (ui64(idle) << ui64(6)) |
+ ui64(poolId);
+ return importance;
+ }
+ };
+
+ // Main balancer implemenation
+ class TBalancer: public IBalancer {
+ private:
+ struct TCpu;
+ struct TPool;
+
+ bool Disabled = true;
+ TSpinLock Lock;
+ ui64 NextBalanceTs;
+ TVector<TCpu> Cpus; // Indexed by CpuId, can have gaps
+ TVector<TPool> Pools; // Indexed by PoolId, can have gaps
+ TBalancerConfig Config;
+
+ public:
+ // Setup
+ TBalancer(const TBalancerConfig& config, const TVector<TUnitedExecutorPoolConfig>& unitedPools, ui64 ts);
+ bool AddCpu(const TCpuAllocation& cpuAlloc, TCpuState* cpu) override;
+ ~TBalancer();
+
+ // Balancing
+ bool TryLock(ui64 ts) override;
+ void SetPoolStats(TPoolId pool, const TBalancerStats& stats) override;
+ void Balance() override;
+ void Unlock() override;
+
+ private:
+ void MoveCpu(TPool& from, TPool& to);
+ };
+
+ struct TBalancer::TPool {
+ TBalancingConfig Config;
+ TPoolId PoolId;
+ TString PoolName;
+
+ // Input data for balancing
+ TBalancerStats Prev;
+ TBalancerStats Next;
+
+ // Derived stats
+ double CpuLoad;
+ double CpuIdle;
+
+ // Classification
+ // NOTE: We want to avoid passing cpu back and forth, so we must consider not only current level,
+ // NOTE: but expected levels after movements also
+ TLevel CurLevel; // Level with current amount of cpu
+ TLevel AddLevel; // Level after one cpu acception
+ TLevel SubLevel; // Level after one cpu donation
+
+ // Balancing state
+ ui64 CurrentCpus = 0; // Total number of cpus assigned for this pool (zero means pools is not balanced)
+ ui64 PrevCpus = 0; // Cpus in last period
+
+ explicit TPool(const TBalancingConfig& cfg = {})
+ : Config(cfg)
+ {}
+
+ void Configure(const TBalancingConfig& cfg, const TString& poolName) {
+ Config = cfg;
+ // Enforce constraints
+ Config.MinCpus = std::clamp<ui32>(Config.MinCpus, 1, Config.Cpus);
+ Config.MaxCpus = Max<ui32>(Config.MaxCpus, Config.Cpus);
+ PoolName = poolName;
+ }
+ };
+
+ struct TBalancer::TCpu {
+ TCpuState* State = nullptr; // Cpu state, nullptr means cpu is not used (gap)
+ TCpuAllocation Alloc;
+ TPoolId Current;
+ TPoolId Assigned;
+ };
+
+ TBalancer::TBalancer(const TBalancerConfig& config, const TVector<TUnitedExecutorPoolConfig>& unitedPools, ui64 ts)
+ : NextBalanceTs(ts)
+ , Config(config)
+ {
+ for (TPoolId pool = 0; pool < MaxPools; pool++) {
+ Pools.emplace_back();
+ Pools.back().PoolId = pool;
+ }
+ for (const TUnitedExecutorPoolConfig& united : unitedPools) {
+ Pools[united.PoolId].Configure(united.Balancing, united.PoolName);
+ }
+ }
+
+ TBalancer::~TBalancer() {
+ }
+
+ bool TBalancer::AddCpu(const TCpuAllocation& cpuAlloc, TCpuState* state) {
+ // Setup
+ TCpuId cpuId = cpuAlloc.CpuId;
+ if (Cpus.size() <= cpuId) {
+ Cpus.resize(cpuId + 1);
+ }
+ TCpu& cpu = Cpus[cpuId];
+ cpu.State = state;
+ cpu.Alloc = cpuAlloc;
+
+ // Fill every pool with cpus up to TBalancingConfig::Cpus
+ TPoolId pool = 0;
+ for (TPool& p : Pools) {
+ if (p.CurrentCpus < p.Config.Cpus) {
+ p.CurrentCpus++;
+ break;
+ }
+ pool++;
+ }
+ if (pool != MaxPools) { // cpu under balancer control
+ state->SwitchPool(pool);
+ state->AssignPool(pool);
+ Disabled = false;
+ return true;
+ }
+ return false; // non-balanced cpu
+ }
+
+ bool TBalancer::TryLock(ui64 ts) {
+ if (!Disabled && NextBalanceTs < ts && Lock.TryAcquire()) {
+ NextBalanceTs = ts + Us2Ts(Config.PeriodUs);
+ return true;
+ }
+ return false;
+ }
+
+ void TBalancer::SetPoolStats(TPoolId pool, const TBalancerStats& stats) {
+ Y_VERIFY(pool < MaxPools);
+ TPool& p = Pools[pool];
+ p.Prev = p.Next;
+ p.Next = stats;
+ }
+
+ void TBalancer::Balance() {
+ // Update every cpu state
+ for (TCpu& cpu : Cpus) {
+ if (cpu.State) {
+ cpu.State->Load(cpu.Assigned, cpu.Current);
+ if (cpu.Current < MaxPools && cpu.Current != cpu.Assigned) {
+ return; // previous movement has not been applied yet, wait
+ }
+ }
+ }
+
+ // Process stats, classify and compute pool importance
+ TStackVec<TPool*, MaxPools> order;
+ for (TPool& pool : Pools) {
+ if (pool.Config.Cpus == 0) {
+ continue; // skip gaps (non-existent or non-united pools)
+ }
+ if (pool.Prev.Ts == 0 || pool.Prev.Ts >= pool.Next.Ts) {
+ return; // invalid stats
+ }
+
+ // Compute derived stats
+ pool.CpuLoad = (pool.Next.CpuUs - pool.Prev.CpuUs) / Ts2Us(pool.Next.Ts - pool.Prev.Ts);
+ if (pool.Prev.IdleUs == ui64(-1) || pool.Next.IdleUs == ui64(-1)) {
+ pool.CpuIdle = pool.CurrentCpus - pool.CpuLoad; // for tests
+ } else {
+ pool.CpuIdle = (pool.Next.IdleUs - pool.Prev.IdleUs) / Ts2Us(pool.Next.Ts - pool.Prev.Ts);
+ }
+
+ // Compute levels
+ pool.CurLevel = TLevel(pool.Config, pool.PoolId, pool.CurrentCpus, pool.CpuIdle);
+ pool.AddLevel = TLevel(pool.Config, pool.PoolId, pool.CurrentCpus + 1, pool.CpuIdle); // we expect taken cpu to became utilized
+ pool.SubLevel = TLevel(pool.Config, pool.PoolId, pool.CurrentCpus - 1, pool.CpuIdle - 1);
+
+ // Prepare for balancing
+ pool.PrevCpus = pool.CurrentCpus;
+ order.push_back(&pool);
+ }
+
+ // Sort pools by importance
+ std::sort(order.begin(), order.end(), [] (TPool* l, TPool* r) {return l->CurLevel.Importance < r->CurLevel.Importance; });
+ for (TPool* pool : order) {
+ LWPROBE(PoolStats, pool->PoolId, pool->PoolName, pool->CurrentCpus, pool->CurLevel.LoadClass, pool->Config.Priority, pool->CurLevel.ScaleFactor, pool->CpuIdle, pool->CpuLoad, pool->CurLevel.Importance, pool->AddLevel.Importance, pool->SubLevel.Importance);
+ }
+
+ // Move cpus from lower importance to higher importance pools
+ for (auto toIter = order.rbegin(); toIter != order.rend(); ++toIter) {
+ TPool& to = **toIter;
+ if (to.CurLevel.LoadClass == TLevel::Overloaded && // if pool is overloaded
+ to.CurrentCpus < to.Config.MaxCpus) // and constraints would not be violated
+ {
+ for (auto fromIter = order.begin(); (*fromIter)->CurLevel.Importance < to.CurLevel.Importance; ++fromIter) {
+ TPool& from = **fromIter;
+ if (from.CurrentCpus == from.PrevCpus && // if not balanced yet
+ from.CurrentCpus > from.Config.MinCpus && // and constraints would not be violated
+ from.SubLevel.Importance < to.AddLevel.Importance) // and which of two pools is more important would not change after cpu movement
+ {
+ MoveCpu(from, to);
+ from.CurrentCpus--;
+ to.CurrentCpus++;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ void TBalancer::MoveCpu(TBalancer::TPool& from, TBalancer::TPool& to) {
+ for (auto ci = Cpus.rbegin(), ce = Cpus.rend(); ci != ce; ci++) {
+ TCpu& cpu = *ci;
+ if (!cpu.State) {
+ continue;
+ }
+ if (cpu.Assigned == from.PoolId) {
+ cpu.State->AssignPool(to.PoolId);
+ cpu.Assigned = to.PoolId;
+ LWPROBE(MoveCpu, from.PoolId, to.PoolId, from.PoolName, to.PoolName, cpu.Alloc.CpuId);
+ return;
+ }
+ }
+ Y_FAIL();
+ }
+
+ void TBalancer::Unlock() {
+ Lock.Release();
+ }
+
+ IBalancer* MakeBalancer(const TBalancerConfig& config, const TVector<TUnitedExecutorPoolConfig>& unitedPools, ui64 ts) {
+ return new TBalancer(config, unitedPools, ts);
+ }
+}
diff --git a/library/cpp/actors/core/balancer.h b/library/cpp/actors/core/balancer.h
new file mode 100644
index 0000000000..9763ec79e1
--- /dev/null
+++ b/library/cpp/actors/core/balancer.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include "defs.h"
+#include "config.h"
+#include "cpu_state.h"
+
+namespace NActors {
+ // Per-pool statistics used by balancer
+ struct TBalancerStats {
+ ui64 Ts = 0; // Measurement timestamp
+ ui64 CpuUs = 0; // Total cpu microseconds consumed by pool on all cpus since start
+ ui64 IdleUs = ui64(-1); // Total cpu microseconds in spinning or waiting on futex
+ };
+
+ // Pool cpu balancer
+ struct IBalancer {
+ virtual ~IBalancer() {}
+ virtual bool AddCpu(const TCpuAllocation& cpuAlloc, TCpuState* cpu) = 0;
+ virtual bool TryLock(ui64 ts) = 0;
+ virtual void SetPoolStats(TPoolId pool, const TBalancerStats& stats) = 0;
+ virtual void Balance() = 0;
+ virtual void Unlock() = 0;
+ // TODO: add method for reconfiguration on fly
+ };
+
+ IBalancer* MakeBalancer(const TBalancerConfig& config, const TVector<TUnitedExecutorPoolConfig>& unitedPools, ui64 ts);
+}
diff --git a/library/cpp/actors/core/balancer_ut.cpp b/library/cpp/actors/core/balancer_ut.cpp
new file mode 100644
index 0000000000..7e5e95f4b9
--- /dev/null
+++ b/library/cpp/actors/core/balancer_ut.cpp
@@ -0,0 +1,225 @@
+#include "balancer.h"
+
+#include <library/cpp/actors/util/datetime.h>
+#include <library/cpp/lwtrace/all.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/stream/str.h>
+
+using namespace NActors;
+
+////////////////////////////////////////////////////////////////////////////////
+
+Y_UNIT_TEST_SUITE(PoolCpuBalancer) {
+ struct TTest {
+ TCpuManagerConfig Config;
+ TCpuMask Available;
+ THolder<IBalancer> Balancer;
+ TVector<TCpuState> CpuStates;
+ TVector<ui64> CpuUs;
+ ui64 Now = 0;
+
+ void SetCpuCount(size_t count) {
+ Config.UnitedWorkers.CpuCount = count;
+ for (TCpuId cpuId = 0; cpuId < count; cpuId++) {
+ Available.Set(cpuId);
+ }
+ }
+
+ void AddPool(ui32 minCpus, ui32 cpus, ui32 maxCpus, ui8 priority = 0) {
+ TUnitedExecutorPoolConfig u;
+ u.PoolId = TPoolId(Config.United.size());
+ u.Balancing.Cpus = cpus;
+ u.Balancing.MinCpus = minCpus;
+ u.Balancing.MaxCpus = maxCpus;
+ u.Balancing.Priority = priority;
+ Config.United.push_back(u);
+ }
+
+ void Start() {
+ TCpuAllocationConfig allocation(Available, Config);
+ Balancer.Reset(MakeBalancer(Config.UnitedWorkers.Balancer, Config.United, 0));
+ CpuStates.resize(allocation.Items.size()); // do not resize it later to avoid dangling pointers
+ CpuUs.resize(CpuStates.size());
+ for (const TCpuAllocation& cpuAlloc : allocation.Items) {
+ bool added = Balancer->AddCpu(cpuAlloc, &CpuStates[cpuAlloc.CpuId]);
+ UNIT_ASSERT(added);
+ }
+ }
+
+ void Balance(ui64 deltaTs, const TVector<ui64>& cpuUs) {
+ Now += deltaTs;
+ ui64 ts = Now;
+ if (Balancer->TryLock(ts)) {
+ for (TPoolId pool = 0; pool < cpuUs.size(); pool++) {
+ CpuUs[pool] += cpuUs[pool];
+ TBalancerStats stats;
+ stats.Ts = ts;
+ stats.CpuUs = CpuUs[pool];
+ Balancer->SetPoolStats(pool, stats);
+ }
+ Balancer->Balance();
+ Balancer->Unlock();
+ }
+ }
+
+ void ApplyMovements() {
+ for (TCpuState& state : CpuStates) {
+ TPoolId current;
+ TPoolId assigned;
+ state.Load(assigned, current);
+ state.SwitchPool(assigned);
+ }
+ }
+
+ static TString ToStr(const TVector<ui64>& values) {
+ TStringStream ss;
+ ss << "{";
+ for (auto v : values) {
+ ss << " " << v;
+ }
+ ss << " }";
+ return ss.Str();
+ }
+
+ void AssertPoolsCurrentCpus(const TVector<ui64>& cpuRequired) {
+ TVector<ui64> cpuCurrent;
+ cpuCurrent.resize(cpuRequired.size());
+ for (TCpuState& state : CpuStates) {
+ TPoolId current;
+ TPoolId assigned;
+ state.Load(assigned, current);
+ cpuCurrent[current]++;
+ }
+ for (TPoolId pool = 0; pool < cpuRequired.size(); pool++) {
+ UNIT_ASSERT_C(cpuCurrent[pool] == cpuRequired[pool],
+ "cpu distribution mismatch, required " << ToStr(cpuRequired) << " but got " << ToStr(cpuCurrent));
+ }
+ }
+ };
+
+ Y_UNIT_TEST(StartLwtrace) {
+ NLWTrace::StartLwtraceFromEnv();
+ }
+
+ Y_UNIT_TEST(AllOverloaded) {
+ TTest t;
+ int cpus = 10;
+ t.SetCpuCount(cpus);
+ t.AddPool(1, 1, 10); // pool=0
+ t.AddPool(1, 2, 10); // pool=1
+ t.AddPool(1, 3, 10); // pool=2
+ t.AddPool(1, 4, 10); // pool=2
+ t.Start();
+ ui64 dts = 1.01 * Us2Ts(t.Config.UnitedWorkers.Balancer.PeriodUs);
+ ui64 totalCpuUs = cpus * Ts2Us(dts); // pretend every pool has consumed as whole actorsystem, overload
+ for (int i = 0; i < cpus; i++) {
+ t.Balance(dts, {totalCpuUs, totalCpuUs, totalCpuUs, totalCpuUs});
+ t.ApplyMovements();
+ }
+ t.AssertPoolsCurrentCpus({1, 2, 3, 4});
+ }
+
+ Y_UNIT_TEST(OneOverloaded) {
+ TTest t;
+ int cpus = 10;
+ t.SetCpuCount(cpus);
+ t.AddPool(1, 1, 10); // pool=0
+ t.AddPool(1, 2, 10); // pool=1
+ t.AddPool(1, 3, 10); // pool=2
+ t.AddPool(1, 4, 10); // pool=2
+ t.Start();
+ ui64 dts = 1.01 * Us2Ts(t.Config.UnitedWorkers.Balancer.PeriodUs);
+ ui64 totalCpuUs = cpus * Ts2Us(dts);
+ for (int i = 0; i < cpus; i++) {
+ t.Balance(dts, {totalCpuUs, 0, 0, 0});
+ t.ApplyMovements();
+ }
+ t.AssertPoolsCurrentCpus({7, 1, 1, 1});
+ for (int i = 0; i < cpus; i++) {
+ t.Balance(dts, {0, totalCpuUs, 0, 0});
+ t.ApplyMovements();
+ }
+ t.AssertPoolsCurrentCpus({1, 7, 1, 1});
+ for (int i = 0; i < cpus; i++) {
+ t.Balance(dts, {0, 0, totalCpuUs, 0});
+ t.ApplyMovements();
+ }
+ t.AssertPoolsCurrentCpus({1, 1, 7, 1});
+ for (int i = 0; i < cpus; i++) {
+ t.Balance(dts, {0, 0, 0, totalCpuUs});
+ t.ApplyMovements();
+ }
+ t.AssertPoolsCurrentCpus({1, 1, 1, 7});
+ }
+
+ Y_UNIT_TEST(TwoOverloadedFairness) {
+ TTest t;
+ int cpus = 10;
+ t.SetCpuCount(cpus);
+ t.AddPool(1, 1, 10); // pool=0
+ t.AddPool(1, 2, 10); // pool=1
+ t.AddPool(1, 3, 10); // pool=2
+ t.AddPool(1, 4, 10); // pool=2
+ t.Start();
+ ui64 dts = 1.01 * Us2Ts(t.Config.UnitedWorkers.Balancer.PeriodUs);
+ ui64 totalCpuUs = cpus * Ts2Us(dts);
+ for (int i = 0; i < cpus; i++) {
+ t.Balance(dts, {totalCpuUs, totalCpuUs, 0, 0});
+ t.ApplyMovements();
+ }
+ t.AssertPoolsCurrentCpus({3, 5, 1, 1});
+ for (int i = 0; i < cpus; i++) {
+ t.Balance(dts, {totalCpuUs, 0, totalCpuUs, 0});
+ t.ApplyMovements();
+ }
+ t.AssertPoolsCurrentCpus({2, 1, 6, 1});
+ for (int i = 0; i < cpus; i++) {
+ t.Balance(dts, {totalCpuUs, 0, 0, totalCpuUs});
+ t.ApplyMovements();
+ }
+ t.AssertPoolsCurrentCpus({2, 1, 1, 6});
+ for (int i = 0; i < cpus; i++) {
+ t.Balance(dts, {0, totalCpuUs, totalCpuUs, 0});
+ t.ApplyMovements();
+ }
+ t.AssertPoolsCurrentCpus({1, 3, 5, 1});
+ for (int i = 0; i < cpus; i++) {
+ t.Balance(dts, {0, totalCpuUs, 0, totalCpuUs});
+ t.ApplyMovements();
+ }
+ t.AssertPoolsCurrentCpus({1, 3, 1, 5});
+ for (int i = 0; i < cpus; i++) {
+ t.Balance(dts, {0, 0, totalCpuUs, totalCpuUs});
+ t.ApplyMovements();
+ }
+ t.AssertPoolsCurrentCpus({1, 1, 3, 5});
+ }
+
+ Y_UNIT_TEST(TwoOverloadedPriority) {
+ TTest t;
+ int cpus = 20;
+ t.SetCpuCount(cpus);
+ t.AddPool(1, 5, 20, 0); // pool=0
+ t.AddPool(1, 5, 20, 1); // pool=1
+ t.AddPool(1, 5, 20, 2); // pool=2
+ t.AddPool(1, 5, 20, 3); // pool=3
+ t.Start();
+ ui64 dts = 1.01 * Us2Ts(t.Config.UnitedWorkers.Balancer.PeriodUs);
+ ui64 mErlang = Ts2Us(dts) / 1000;
+ for (int i = 0; i < cpus; i++) {
+ t.Balance(dts, {20000 * mErlang, 2500 * mErlang, 4500 * mErlang, 9500 * mErlang});
+ t.ApplyMovements();
+ }
+ t.AssertPoolsCurrentCpus({2, 3, 5, 10});
+ t.Balance(dts, {20000 * mErlang, 2500 * mErlang, 4500 * mErlang, 8500 * mErlang});
+ t.ApplyMovements();
+ t.AssertPoolsCurrentCpus({3, 3, 5, 9});
+ // NOTE: this operation require one move, but we do not make global analysis, so multiple steps (1->2 & 0->1) are required (can be optimized later)
+ for (int i = 0; i < 3; i++) {
+ t.Balance(dts, {20000 * mErlang, 2500 * mErlang, 5500 * mErlang, 8500 * mErlang});
+ t.ApplyMovements();
+ }
+ t.AssertPoolsCurrentCpus({2, 3, 6, 9});
+ }
+}
diff --git a/library/cpp/actors/core/buffer.cpp b/library/cpp/actors/core/buffer.cpp
new file mode 100644
index 0000000000..48128d76ef
--- /dev/null
+++ b/library/cpp/actors/core/buffer.cpp
@@ -0,0 +1,93 @@
+#include "buffer.h"
+
+#include <util/system/yassert.h>
+
+#include <algorithm>
+
+TBufferBase::TBufferBase(size_t size) noexcept
+ : Size(size)
+{
+}
+
+size_t
+TBufferBase::GetSize() const noexcept {
+ return Size;
+}
+
+void TBufferBase::SetSize(size_t size) noexcept {
+ Size = size;
+}
+
+/////////////////////////////////////////////////////////////////////
+
+template <typename PointerType>
+TBufferBaseT<PointerType>::TBufferBaseT(PointerType data, size_t size) noexcept
+ : TBufferBase(size)
+ , Data(data)
+{
+}
+
+template <typename PointerType>
+PointerType
+TBufferBaseT<PointerType>::GetPointer() const noexcept {
+ return Data;
+}
+
+template <typename PointerType>
+void TBufferBaseT<PointerType>::Assign(PointerType data, size_t size) noexcept {
+ Data = data;
+ Size = size;
+}
+
+template <>
+void TBufferBaseT<void*>::Cut(size_t offset) noexcept {
+ Y_VERIFY_DEBUG(offset <= Size);
+ Data = static_cast<char*>(Data) + offset;
+ TBufferBase::Size -= offset;
+}
+
+template <>
+void TBufferBaseT<const void*>::Cut(size_t offset) noexcept {
+ Y_VERIFY_DEBUG(offset <= Size);
+ Data = static_cast<const char*>(Data) + offset;
+ TBufferBase::Size -= offset;
+}
+
+template class TBufferBaseT<void*>;
+template class TBufferBaseT<const void*>;
+
+/////////////////////////////////////////////////////////////////////
+
+TConstBuffer::TConstBuffer(const void* data, size_t size) noexcept
+ : TBufferBaseT<const void*>(data, size)
+{
+}
+
+TConstBuffer::TConstBuffer(const TMutableBuffer& buffer) noexcept
+ : TBufferBaseT<const void*>(buffer.GetPointer(), buffer.GetSize())
+{
+}
+
+TConstBuffer
+TConstBuffer::Offset(ptrdiff_t offset, size_t size) const noexcept {
+ return TConstBuffer(static_cast<const char*>(Data) + offset, std::min(Size - offset, size));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TMutableBuffer::TMutableBuffer(void* data, size_t size) noexcept
+ : TBufferBaseT<void*>(data, size)
+{
+}
+
+TMutableBuffer
+TMutableBuffer::Offset(ptrdiff_t offset, size_t size) const noexcept {
+ return TMutableBuffer(static_cast<char*>(Data) + offset, std::min(Size - offset, size));
+}
+
+size_t
+TMutableBuffer::CopyFrom(const TConstBuffer& buffer) const noexcept {
+ const auto size = std::min(Size, buffer.Size);
+ std::memcpy(Data, buffer.Data, size);
+ return size;
+}
diff --git a/library/cpp/actors/core/buffer.h b/library/cpp/actors/core/buffer.h
new file mode 100644
index 0000000000..95425046d6
--- /dev/null
+++ b/library/cpp/actors/core/buffer.h
@@ -0,0 +1,62 @@
+#pragma once
+
+#include <limits>
+
+class TConstBuffer;
+class TMutableBuffer;
+
+class TBufferBase {
+public:
+ size_t GetSize() const noexcept;
+
+ void SetSize(size_t newSize) noexcept;
+
+protected:
+ TBufferBase(size_t size = 0) noexcept;
+
+ size_t Size;
+};
+
+template <typename PointerType>
+class TBufferBaseT: public TBufferBase {
+public:
+ PointerType GetPointer() const noexcept;
+
+ void Cut(size_t offset) noexcept;
+
+ void Assign(PointerType data = nullptr, size_t size = 0U) noexcept;
+
+protected:
+ TBufferBaseT(PointerType data, size_t size) noexcept;
+
+ PointerType Data;
+};
+
+/// Represents constant memory buffer, but do not owns it.
+class TConstBuffer: public TBufferBaseT<const void*> {
+ friend class TMutableBuffer;
+
+public:
+ TConstBuffer(const TMutableBuffer& buffer) noexcept;
+
+ TConstBuffer(const void* data = nullptr, size_t size = 0U) noexcept;
+
+ TConstBuffer Offset(ptrdiff_t offset, size_t size = std::numeric_limits<size_t>::max()) const noexcept;
+};
+
+/// Represents mutable memory buffer, but do not owns it.
+class TMutableBuffer: public TBufferBaseT<void*> {
+ friend class TConstBuffer;
+
+public:
+ TMutableBuffer(void* data = nullptr, size_t size = 0U) noexcept;
+
+ TMutableBuffer(const TMutableBuffer& value) noexcept
+ : TBufferBaseT<void*>(value)
+ {
+ }
+
+ TMutableBuffer Offset(ptrdiff_t offset, size_t size = std::numeric_limits<size_t>::max()) const noexcept;
+
+ size_t CopyFrom(const TConstBuffer& buffer) const noexcept;
+};
diff --git a/library/cpp/actors/core/callstack.cpp b/library/cpp/actors/core/callstack.cpp
new file mode 100644
index 0000000000..9297c1a079
--- /dev/null
+++ b/library/cpp/actors/core/callstack.cpp
@@ -0,0 +1,93 @@
+#include "callstack.h"
+#include <util/thread/singleton.h>
+
+#ifdef USE_ACTOR_CALLSTACK
+
+namespace NActors {
+ namespace {
+ void (*PreviousFormatBackTrace)(IOutputStream*) = 0;
+ ui32 ActorBackTraceEnableCounter = 0;
+ }
+
+ void ActorFormatBackTrace(IOutputStream* out) {
+ TStringStream str;
+ PreviousFormatBackTrace(&str);
+ str << Endl;
+ TCallstack::DumpCallstack(str);
+ *out << str.Str();
+ }
+
+ void EnableActorCallstack() {
+ if (ActorBackTraceEnableCounter == 0) {
+ Y_VERIFY(PreviousFormatBackTrace == 0);
+ PreviousFormatBackTrace = SetFormatBackTraceFn(ActorFormatBackTrace);
+ }
+
+ ++ActorBackTraceEnableCounter;
+ }
+
+ void DisableActorCallstack() {
+ --ActorBackTraceEnableCounter;
+
+ if (ActorBackTraceEnableCounter == 0) {
+ Y_VERIFY(PreviousFormatBackTrace);
+ SetFormatBackTraceFn(PreviousFormatBackTrace);
+ PreviousFormatBackTrace = 0;
+ }
+ }
+
+ TCallstack::TCallstack()
+ : BeginIdx(0)
+ , Size(0)
+ , LinesToSkip(0)
+ {
+ }
+
+ void TCallstack::SetLinesToSkip() {
+ TTrace record;
+ LinesToSkip = BackTrace(record.Data, TTrace::CAPACITY);
+ }
+
+ void TCallstack::Trace() {
+ size_t currentIdx = (BeginIdx + Size) % RECORDS;
+ if (Size == RECORDS) {
+ ++BeginIdx;
+ } else {
+ ++Size;
+ }
+ TTrace& record = Record[currentIdx];
+ record.Size = BackTrace(record.Data, TTrace::CAPACITY);
+ record.LinesToSkip = LinesToSkip;
+ }
+
+ void TCallstack::TraceIfEmpty() {
+ if (Size == 0) {
+ LinesToSkip = 0;
+ Trace();
+ }
+ }
+
+ TCallstack& TCallstack::GetTlsCallstack() {
+ return *FastTlsSingleton<TCallstack>();
+ }
+
+ void TCallstack::DumpCallstack(TStringStream& str) {
+ TCallstack& callstack = GetTlsCallstack();
+ for (int i = callstack.Size - 1; i >= 0; --i) {
+ TTrace& record = callstack.Record[(callstack.BeginIdx + i) % RECORDS];
+ str << Endl << "Trace entry " << i << Endl << Endl;
+ size_t size = record.Size;
+ if (size > record.LinesToSkip && size < TTrace::CAPACITY) {
+ size -= record.LinesToSkip;
+ }
+ if (size > RECORDS_TO_SKIP) {
+ FormatBackTrace(&str, &record.Data[RECORDS_TO_SKIP], size - RECORDS_TO_SKIP);
+ } else {
+ FormatBackTrace(&str, record.Data, size);
+ }
+ str << Endl;
+ }
+ }
+}
+
+#endif
diff --git a/library/cpp/actors/core/callstack.h b/library/cpp/actors/core/callstack.h
new file mode 100644
index 0000000000..176717d2ae
--- /dev/null
+++ b/library/cpp/actors/core/callstack.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#ifndef NDEBUG
+//#define ENABLE_ACTOR_CALLSTACK
+#endif
+
+#ifdef ENABLE_ACTOR_CALLSTACK
+#include "defs.h"
+#include <util/system/backtrace.h>
+#include <util/stream/str.h>
+#include <util/generic/deque.h>
+#define USE_ACTOR_CALLSTACK
+
+namespace NActors {
+ struct TCallstack {
+ struct TTrace {
+ static const size_t CAPACITY = 50;
+ void* Data[CAPACITY];
+ size_t Size;
+ size_t LinesToSkip;
+
+ TTrace()
+ : Size(0)
+ , LinesToSkip(0)
+ {
+ }
+ };
+
+ static const size_t RECORDS = 8;
+ static const size_t RECORDS_TO_SKIP = 2;
+ TTrace Record[RECORDS];
+ size_t BeginIdx;
+ size_t Size;
+ size_t LinesToSkip;
+
+ TCallstack();
+ void SetLinesToSkip();
+ void Trace();
+ void TraceIfEmpty();
+ static TCallstack& GetTlsCallstack();
+ static void DumpCallstack(TStringStream& str);
+ };
+
+ void EnableActorCallstack();
+ void DisableActorCallstack();
+
+}
+
+#else
+
+namespace NActors {
+ inline void EnableActorCallstack(){};
+
+ inline void DisableActorCallstack(){};
+
+}
+
+#endif
diff --git a/library/cpp/actors/core/config.h b/library/cpp/actors/core/config.h
new file mode 100644
index 0000000000..2486bf4c43
--- /dev/null
+++ b/library/cpp/actors/core/config.h
@@ -0,0 +1,239 @@
+#pragma once
+
+#include "defs.h"
+#include <library/cpp/actors/util/cpumask.h>
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+#include <util/datetime/base.h>
+#include <util/generic/ptr.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+
+namespace NActors {
+
+ struct TBalancingConfig {
+ // Default cpu count (used during overload). Zero value disables this pool balancing
+ // 1) Sum of `Cpus` on all pools cannot be changed without restart
+ // (changing cpu mode between Shared and Assigned is not implemented yet)
+ // 2) This sum must be equal to TUnitedWorkersConfig::CpuCount,
+ // otherwise `CpuCount - SUM(Cpus)` cpus will be in Shared mode (i.e. actorsystem 2.0)
+ ui32 Cpus = 0;
+
+ ui32 MinCpus = 0; // Lower balancing bound, should be at least 1, and not greater than `Cpus`
+ ui32 MaxCpus = 0; // Higher balancing bound, should be not lower than `Cpus`
+ ui8 Priority = 0; // Priority of pool to obtain cpu due to balancing (higher is better)
+ ui64 ToleratedLatencyUs = 0; // p100-latency threshold indicating that more cpus are required by pool
+ };
+
+ struct TBalancerConfig {
+ ui64 PeriodUs = 15000000; // Time between balancer steps
+ };
+
+ struct TBasicExecutorPoolConfig {
+ static constexpr TDuration DEFAULT_TIME_PER_MAILBOX = TDuration::MilliSeconds(10);
+ static constexpr ui32 DEFAULT_EVENTS_PER_MAILBOX = 100;
+
+ ui32 PoolId = 0;
+ TString PoolName;
+ ui32 Threads = 1;
+ ui64 SpinThreshold = 100;
+ TCpuMask Affinity; // Executor thread affinity
+ TDuration TimePerMailbox = DEFAULT_TIME_PER_MAILBOX;
+ ui32 EventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX;
+ int RealtimePriority = 0;
+ ui32 MaxActivityType = 1;
+ };
+
+ struct TIOExecutorPoolConfig {
+ ui32 PoolId = 0;
+ TString PoolName;
+ ui32 Threads = 1;
+ TCpuMask Affinity; // Executor thread affinity
+ ui32 MaxActivityType = 1;
+ };
+
+ struct TUnitedExecutorPoolConfig {
+ static constexpr TDuration DEFAULT_TIME_PER_MAILBOX = TDuration::MilliSeconds(10);
+ static constexpr ui32 DEFAULT_EVENTS_PER_MAILBOX = 100;
+
+ ui32 PoolId = 0;
+ TString PoolName;
+
+ // Resource sharing
+ ui32 Concurrency = 0; // Limits simultaneously running mailboxes count if set to non-zero value (do not set if Balancing.Cpus != 0)
+ TPoolWeight Weight = 0; // Weight in fair cpu-local pool scheduler
+ TCpuMask Allowed; // Allowed CPUs for workers to run this pool on (ignored if balancer works, i.e. actorsystem 1.5)
+
+ // Single mailbox execution limits
+ TDuration TimePerMailbox = DEFAULT_TIME_PER_MAILBOX;
+ ui32 EventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX;
+
+ // Introspection
+ ui32 MaxActivityType = 1;
+
+ // Long-term balancing
+ TBalancingConfig Balancing;
+ };
+
+ struct TUnitedWorkersConfig {
+ ui32 CpuCount = 0; // Total CPUs running united workers (i.e. TBasicExecutorPoolConfig::Threads analog); set to zero to disable united workers
+ ui64 SpinThresholdUs = 100; // Limit for active spinning in case all pools became idle
+ ui64 PoolLimitUs = 500; // Soft limit on pool execution
+ ui64 EventLimitUs = 100; // Hard limit on last event execution exceeding pool limit
+ ui64 LimitPrecisionUs = 100; // Maximum delay of timer on limit excess (delay needed to avoid settimer syscall on every pool switch)
+ ui64 FastWorkerPriority = 10; // Real-time priority of workers not exceeding hard limits
+ ui64 IdleWorkerPriority = 20; // Real-time priority of standby workers waiting for hard preemption on timers (should be greater than FastWorkerPriority)
+ TCpuMask Allowed; // Allowed CPUs for workers to run on (every worker has affinity for exactly one cpu)
+ bool NoRealtime = false; // For environments w/o permissions for RT-threads
+ bool NoAffinity = false; // For environments w/o permissions for cpu affinity
+ TBalancerConfig Balancer;
+ };
+
+ struct TCpuManagerConfig {
+ TUnitedWorkersConfig UnitedWorkers;
+ TVector<TBasicExecutorPoolConfig> Basic;
+ TVector<TIOExecutorPoolConfig> IO;
+ TVector<TUnitedExecutorPoolConfig> United;
+
+ ui32 GetExecutorsCount() const {
+ return Basic.size() + IO.size() + United.size();
+ }
+
+ TString GetPoolName(ui32 poolId) const {
+ for (const auto& p : Basic) {
+ if (p.PoolId == poolId) {
+ return p.PoolName;
+ }
+ }
+ for (const auto& p : IO) {
+ if (p.PoolId == poolId) {
+ return p.PoolName;
+ }
+ }
+ for (const auto& p : United) {
+ if (p.PoolId == poolId) {
+ return p.PoolName;
+ }
+ }
+ Y_FAIL("undefined pool id: %" PRIu32, (ui32)poolId);
+ }
+
+ ui32 GetThreads(ui32 poolId) const {
+ for (const auto& p : Basic) {
+ if (p.PoolId == poolId) {
+ return p.Threads;
+ }
+ }
+ for (const auto& p : IO) {
+ if (p.PoolId == poolId) {
+ return p.Threads;
+ }
+ }
+ for (const auto& p : United) {
+ if (p.PoolId == poolId) {
+ return p.Concurrency ? p.Concurrency : UnitedWorkers.CpuCount;
+ }
+ }
+ Y_FAIL("undefined pool id: %" PRIu32, (ui32)poolId);
+ }
+ };
+
+ struct TSchedulerConfig {
+ TSchedulerConfig(
+ ui64 resolution = 1024,
+ ui64 spinThreshold = 100,
+ ui64 progress = 10000,
+ bool useSchedulerActor = false)
+ : ResolutionMicroseconds(resolution)
+ , SpinThreshold(spinThreshold)
+ , ProgressThreshold(progress)
+ , UseSchedulerActor(useSchedulerActor)
+ {}
+
+ ui64 ResolutionMicroseconds = 1024;
+ ui64 SpinThreshold = 100;
+ ui64 ProgressThreshold = 10000;
+ bool UseSchedulerActor = false; // False is default because tests use scheduler thread
+ ui64 RelaxedSendPaceEventsPerSecond = 200000;
+ ui64 RelaxedSendPaceEventsPerCycle = RelaxedSendPaceEventsPerSecond * ResolutionMicroseconds / 1000000;
+ // For resolution >= 250000 microseconds threshold is SendPace
+ // For resolution <= 250 microseconds threshold is 20 * SendPace
+ ui64 RelaxedSendThresholdEventsPerSecond = RelaxedSendPaceEventsPerSecond *
+ (20 - ((20 - 1) * ClampVal(ResolutionMicroseconds, ui64(250), ui64(250000)) - 250) / (250000 - 250));
+ ui64 RelaxedSendThresholdEventsPerCycle = RelaxedSendThresholdEventsPerSecond * ResolutionMicroseconds / 1000000;
+
+ // Optional subsection for scheduler counters (usually subsystem=utils)
+ NMonitoring::TDynamicCounterPtr MonCounters = nullptr;
+ };
+
+ struct TCpuAllocation {
+ struct TPoolAllocation {
+ TPoolId PoolId;
+ TPoolWeight Weight;
+
+ TPoolAllocation(TPoolId poolId = 0, TPoolWeight weight = 0)
+ : PoolId(poolId)
+ , Weight(weight)
+ {}
+ };
+
+ TCpuId CpuId;
+ TVector<TPoolAllocation> AllowedPools;
+
+ TPoolsMask GetPoolsMask() const {
+ TPoolsMask mask = 0;
+ for (const auto& pa : AllowedPools) {
+ if (pa.PoolId < MaxPools) {
+ mask &= (1ull << pa.PoolId);
+ }
+ }
+ return mask;
+ }
+
+ bool HasPool(TPoolId pool) const {
+ for (const auto& pa : AllowedPools) {
+ if (pa.PoolId == pool) {
+ return true;
+ }
+ }
+ return false;
+ }
+ };
+
+ struct TCpuAllocationConfig {
+ TVector<TCpuAllocation> Items;
+
+ TCpuAllocationConfig(const TCpuMask& available, const TCpuManagerConfig& cfg) {
+ for (const TUnitedExecutorPoolConfig& pool : cfg.United) {
+ Y_VERIFY(pool.PoolId < MaxPools, "wrong PoolId of united executor pool: %s(%d)",
+ pool.PoolName.c_str(), (pool.PoolId));
+ }
+ ui32 allocated[MaxPools] = {0};
+ for (TCpuId cpu = 0; cpu < available.Size() && Items.size() < cfg.UnitedWorkers.CpuCount; cpu++) {
+ if (available.IsSet(cpu)) {
+ TCpuAllocation item;
+ item.CpuId = cpu;
+ for (const TUnitedExecutorPoolConfig& pool : cfg.United) {
+ if (cfg.UnitedWorkers.Allowed.IsEmpty() || cfg.UnitedWorkers.Allowed.IsSet(cpu)) {
+ if (pool.Allowed.IsEmpty() || pool.Allowed.IsSet(cpu)) {
+ item.AllowedPools.emplace_back(pool.PoolId, pool.Weight);
+ allocated[pool.PoolId]++;
+ }
+ }
+ }
+ if (!item.AllowedPools.empty()) {
+ Items.push_back(item);
+ }
+ }
+ }
+ for (const TUnitedExecutorPoolConfig& pool : cfg.United) {
+ Y_VERIFY(allocated[pool.PoolId] > 0, "unable to allocate cpu for united executor pool: %s(%d)",
+ pool.PoolName.c_str(), (pool.PoolId));
+ }
+ }
+
+ operator bool() const {
+ return !Items.empty();
+ }
+ };
+
+}
diff --git a/library/cpp/actors/core/cpu_manager.cpp b/library/cpp/actors/core/cpu_manager.cpp
new file mode 100644
index 0000000000..39089b5d83
--- /dev/null
+++ b/library/cpp/actors/core/cpu_manager.cpp
@@ -0,0 +1,108 @@
+#include "cpu_manager.h"
+#include "probes.h"
+
+namespace NActors {
+ LWTRACE_USING(ACTORLIB_PROVIDER);
+
+ void TCpuManager::Setup() {
+ TAffinity available;
+ available.Current();
+ TCpuAllocationConfig allocation(available, Config);
+
+ if (allocation) {
+ if (!Balancer) {
+ Balancer.Reset(MakeBalancer(Config.UnitedWorkers.Balancer, Config.United, GetCycleCountFast()));
+ }
+ UnitedWorkers.Reset(new TUnitedWorkers(Config.UnitedWorkers, Config.United, allocation, Balancer.Get()));
+ }
+
+ Executors.Reset(new TAutoPtr<IExecutorPool>[ExecutorPoolCount]);
+
+ for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+ Executors[excIdx].Reset(CreateExecutorPool(excIdx));
+ }
+ }
+
+ void TCpuManager::PrepareStart(TVector<NSchedulerQueue::TReader*>& scheduleReaders, TActorSystem* actorSystem) {
+ if (UnitedWorkers) {
+ UnitedWorkers->Prepare(actorSystem, scheduleReaders);
+ }
+ for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+ NSchedulerQueue::TReader* readers;
+ ui32 readersCount = 0;
+ Executors[excIdx]->Prepare(actorSystem, &readers, &readersCount);
+ for (ui32 i = 0; i != readersCount; ++i, ++readers) {
+ scheduleReaders.push_back(readers);
+ }
+ }
+ }
+
+ void TCpuManager::Start() {
+ if (UnitedWorkers) {
+ UnitedWorkers->Start();
+ }
+ for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+ Executors[excIdx]->Start();
+ }
+ }
+
+ void TCpuManager::PrepareStop() {
+ for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+ Executors[excIdx]->PrepareStop();
+ }
+ if (UnitedWorkers) {
+ UnitedWorkers->PrepareStop();
+ }
+ }
+
+ void TCpuManager::Shutdown() {
+ for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+ Executors[excIdx]->Shutdown();
+ }
+ if (UnitedWorkers) {
+ UnitedWorkers->Shutdown();
+ }
+ for (ui32 round = 0, done = 0; done < ExecutorPoolCount && round < 3; ++round) {
+ done = 0;
+ for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+ if (Executors[excIdx]->Cleanup()) {
+ ++done;
+ }
+ }
+ }
+ }
+
+ void TCpuManager::Cleanup() {
+ for (ui32 round = 0, done = 0; done < ExecutorPoolCount; ++round) {
+ Y_VERIFY(round < 10, "actorsystem cleanup could not be completed in 10 rounds");
+ done = 0;
+ for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+ if (Executors[excIdx]->Cleanup()) {
+ ++done;
+ }
+ }
+ }
+ Executors.Destroy();
+ UnitedWorkers.Destroy();
+ }
+
+ IExecutorPool* TCpuManager::CreateExecutorPool(ui32 poolId) {
+ for (TBasicExecutorPoolConfig& cfg : Config.Basic) {
+ if (cfg.PoolId == poolId) {
+ return new TBasicExecutorPool(cfg);
+ }
+ }
+ for (TIOExecutorPoolConfig& cfg : Config.IO) {
+ if (cfg.PoolId == poolId) {
+ return new TIOExecutorPool(cfg);
+ }
+ }
+ for (TUnitedExecutorPoolConfig& cfg : Config.United) {
+ if (cfg.PoolId == poolId) {
+ IExecutorPool* result = new TUnitedExecutorPool(cfg, UnitedWorkers.Get());
+ return result;
+ }
+ }
+ Y_FAIL("missing PoolId: %d", int(poolId));
+ }
+}
diff --git a/library/cpp/actors/core/cpu_manager.h b/library/cpp/actors/core/cpu_manager.h
new file mode 100644
index 0000000000..454035477b
--- /dev/null
+++ b/library/cpp/actors/core/cpu_manager.h
@@ -0,0 +1,57 @@
+#pragma once
+
+#include "actorsystem.h"
+#include "executor_pool_basic.h"
+#include "executor_pool_io.h"
+#include "executor_pool_united.h"
+
+namespace NActors {
+ class TCpuManager : public TNonCopyable {
+ const ui32 ExecutorPoolCount;
+ TArrayHolder<TAutoPtr<IExecutorPool>> Executors;
+ THolder<TUnitedWorkers> UnitedWorkers;
+ THolder<IBalancer> Balancer;
+ TCpuManagerConfig Config;
+ public:
+ explicit TCpuManager(THolder<TActorSystemSetup>& setup)
+ : ExecutorPoolCount(setup->GetExecutorsCount())
+ , Balancer(setup->Balancer)
+ , Config(setup->CpuManager)
+ {
+ if (setup->Executors) { // Explicit mode w/o united pools
+ Executors.Reset(setup->Executors.Release());
+ for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+ IExecutorPool* pool = Executors[excIdx].Get();
+ Y_VERIFY(dynamic_cast<TUnitedExecutorPool*>(pool) == nullptr,
+ "united executor pool is prohibited in explicit mode of NActors::TCpuManager");
+ }
+ } else {
+ Setup();
+ }
+ }
+
+ void Setup();
+ void PrepareStart(TVector<NSchedulerQueue::TReader*>& scheduleReaders, TActorSystem* actorSystem);
+ void Start();
+ void PrepareStop();
+ void Shutdown();
+ void Cleanup();
+
+ ui32 GetExecutorsCount() const {
+ return ExecutorPoolCount;
+ }
+
+ IExecutorPool* GetExecutorPool(ui32 poolId) {
+ return Executors[poolId].Get();
+ }
+
+ void GetPoolStats(ui32 poolId, TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const {
+ if (poolId < ExecutorPoolCount) {
+ Executors[poolId]->GetCurrentStats(poolStats, statsCopy);
+ }
+ }
+
+ private:
+ IExecutorPool* CreateExecutorPool(ui32 poolId);
+ };
+}
diff --git a/library/cpp/actors/core/cpu_state.h b/library/cpp/actors/core/cpu_state.h
new file mode 100644
index 0000000000..b8030149a7
--- /dev/null
+++ b/library/cpp/actors/core/cpu_state.h
@@ -0,0 +1,215 @@
+#pragma once
+
+#include "defs.h"
+
+#include <library/cpp/actors/util/futex.h>
+
+namespace NActors {
+
+ class alignas(64) TCpuState {
+ // Atomic cachelign-aligned 64-bit state, see description below
+ TAtomic State = 0;
+ char Padding[64 - sizeof(TAtomic)];
+
+ // Bits 0-31: Currently executing pool
+ // - value less than MaxPools means cpu is executing corresponding pool (fast-worker is executing or waiting for slow-workers)
+ // - one of Cpu* values in case of idle cpu
+ // - used as futex by blocked fast-worker
+ static constexpr ui64 CurrentBits = 32;
+ static constexpr ui64 CurrentMask = ui64((1ull << CurrentBits) - 1);
+
+ // Bits 32-63: Assigned pool
+ // - value is set by balancer
+ // - NOT used as futex
+ // - Not balanced
+ static constexpr ui64 AssignedOffs = 32;
+ static constexpr ui64 AssignedMask = ~CurrentMask;
+
+ public:
+ TCpuState() {
+ Y_UNUSED(Padding);
+ }
+
+ void Load(TPoolId& assigned, TPoolId& current) const {
+ TAtomicBase state = AtomicLoad(&State);
+ assigned = (state & AssignedMask) >> AssignedOffs;
+ current = state & CurrentMask;
+ }
+
+ TPoolId CurrentPool() const {
+ return TPoolId(AtomicLoad(&State) & CurrentMask);
+ }
+
+ void SwitchPool(TPoolId pool) {
+ while (true) {
+ TAtomicBase state = AtomicLoad(&State);
+ if (AtomicCas(&State, (state & ~CurrentMask) | pool, state)) {
+ return;
+ }
+ }
+ }
+
+ TPoolId AssignedPool() const {
+ return TPoolId((AtomicLoad(&State) & AssignedMask) >> AssignedOffs);
+ }
+
+ // Assigns new pool to cpu and wakes it up if cpu is idle
+ void AssignPool(TPoolId pool) {
+ while (true) {
+ TAtomicBase state = AtomicLoad(&State);
+ TPoolId current(state & CurrentMask);
+ if (Y_UNLIKELY(current == CpuStopped)) {
+ return; // it would be better to shutdown instead of balancing
+ }
+ // Idle cpu must be woken up after balancing to handle pending tokens (if any) in assigned/schedulable pool(s)
+ if (current == CpuSpinning) {
+ if (AtomicCas(&State, (ui64(pool) << AssignedOffs) | pool, state)) {
+ return; // successfully woken up
+ }
+ } else if (current == CpuBlocked) {
+ if (AtomicCas(&State, (ui64(pool) << AssignedOffs) | pool, state)) {
+ FutexWake();
+ return; // successfully woken up
+ }
+ } else {
+ if (AtomicCas(&State, (ui64(pool) << AssignedOffs) | (state & ~AssignedMask), state)) {
+ return; // wakeup is not required
+ }
+ }
+ }
+ }
+
+ void Stop() {
+ while (true) {
+ TAtomicBase state = AtomicLoad(&State);
+ if (AtomicCas(&State, (state & ~CurrentMask) | CpuStopped, state)) {
+ FutexWake();
+ return; // successfully stopped
+ }
+ }
+ }
+
+ // Start waiting, returns false in case of actorsystem shutdown
+ bool StartSpinning() {
+ while (true) {
+ TAtomicBase state = AtomicLoad(&State);
+ TPoolId current(state & CurrentMask);
+ if (Y_UNLIKELY(current == CpuStopped)) {
+ return false;
+ }
+ Y_VERIFY_DEBUG(current < MaxPools, "unexpected already waiting state of cpu (%d)", (int)current);
+ if (AtomicCas(&State, (state & ~CurrentMask) | CpuSpinning, state)) { // successfully marked as spinning
+ return true;
+ }
+ }
+ }
+
+ bool StartBlocking() {
+ while (true) {
+ TAtomicBase state = AtomicLoad(&State);
+ TPoolId current(state & CurrentMask);
+ if (current == CpuSpinning) {
+ if (AtomicCas(&State, (state & ~CurrentMask) | CpuBlocked, state)) {
+ return false; // successful switch
+ }
+ } else {
+ return true; // wakeup
+ }
+ }
+ }
+
+ bool Block(ui64 timeoutNs, TPoolId& result) {
+#ifdef _linux_
+ timespec timeout;
+ timeout.tv_sec = timeoutNs / 1'000'000'000;
+ timeout.tv_nsec = timeoutNs % 1'000'000'000;
+ SysFutex(Futex(), FUTEX_WAIT_PRIVATE, CpuBlocked, &timeout, nullptr, 0);
+#else
+ NanoSleep(timeoutNs); // non-linux wake is not supported, cpu will go idle on wake after blocked state
+#endif
+ TAtomicBase state = AtomicLoad(&State);
+ TPoolId current(state & CurrentMask);
+ if (current == CpuBlocked) {
+ return false; // timeout
+ } else {
+ result = current;
+ return true; // wakeup
+ }
+ }
+
+ enum EWakeResult {
+ Woken, // successfully woken up
+ NotIdle, // cpu is already not idle
+ Forbidden, // cpu is assigned to another pool
+ Stopped, // cpu is shutdown
+ };
+
+ EWakeResult WakeWithoutToken(TPoolId pool) {
+ while (true) {
+ TAtomicBase state = RelaxedLoad(&State);
+ TPoolId current(state & CurrentMask);
+ TPoolId assigned((state & AssignedMask) >> AssignedOffs);
+ if (assigned == CpuShared || assigned == pool) {
+ if (current == CpuSpinning) {
+ if (AtomicCas(&State, (state & ~CurrentMask) | pool, state)) {
+ return Woken;
+ }
+ } else if (current == CpuBlocked) {
+ if (AtomicCas(&State, (state & ~CurrentMask) | pool, state)) {
+ FutexWake();
+ return Woken;
+ }
+ } else if (current == CpuStopped) {
+ return Stopped;
+ } else {
+ return NotIdle;
+ }
+ } else {
+ return Forbidden;
+ }
+ }
+ }
+
+ EWakeResult WakeWithTokenAcquired(TPoolId token) {
+ while (true) {
+ TAtomicBase state = RelaxedLoad(&State);
+ TPoolId current(state & CurrentMask);
+ // NOTE: We ignore assigned value because we already have token, so
+ // NOTE: not assigned pool may be run here. This will be fixed
+ // NOTE: after we finish with current activation
+ if (current == CpuSpinning) {
+ if (AtomicCas(&State, (state & ~CurrentMask) | token, state)) {
+ return Woken;
+ }
+ } else if (current == CpuBlocked) {
+ if (AtomicCas(&State, (state & ~CurrentMask) | token, state)) {
+ FutexWake();
+ return Woken;
+ }
+ } else if (current == CpuStopped) {
+ return Stopped;
+ } else {
+ return NotIdle;
+ }
+ }
+ }
+
+ bool IsPoolReassigned(TPoolId current) const {
+ TAtomicBase state = AtomicLoad(&State);
+ TPoolId assigned((state & AssignedMask) >> AssignedOffs);
+ return assigned != current;
+ }
+
+ private:
+ void* Futex() {
+ return (void*)&State; // little endian assumed
+ }
+
+ void FutexWake() {
+#ifdef _linux_
+ SysFutex(Futex(), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0);
+#endif
+ }
+ };
+
+}
diff --git a/library/cpp/actors/core/defs.h b/library/cpp/actors/core/defs.h
new file mode 100644
index 0000000000..980b7d767b
--- /dev/null
+++ b/library/cpp/actors/core/defs.h
@@ -0,0 +1,69 @@
+#pragma once
+
+// unique tag to fix pragma once gcc glueing: ./library/actorlib/core/defs.h
+
+#include <library/cpp/actors/util/defs.h>
+#include <util/generic/hash.h>
+#include <util/string/printf.h>
+
+// Enables collection of
+// event send/receive counts
+// activation time histograms
+// event processing time histograms
+#define ACTORSLIB_COLLECT_EXEC_STATS
+
+namespace NActors {
+ using TPoolId = ui8;
+ using TPoolsMask = ui64;
+ static constexpr TPoolId PoolBits = 6;
+ static constexpr TPoolId MaxPools = (1 << PoolBits) - 1; // maximum amount of pools (poolid=63 is reserved)
+ static constexpr TPoolsMask WaitPoolsFlag = (1ull << MaxPools); // wait-for-slow-workers flag bitmask
+
+ // Special TPoolId values used by TCpuState
+ static constexpr TPoolId CpuSpinning = MaxPools; // fast-worker is actively spinning, no slow-workers
+ static constexpr TPoolId CpuBlocked = MaxPools + 1; // fast-worker is blocked, no slow-workers
+ static constexpr TPoolId CpuStopped = TPoolId(-1); // special value indicating worker should stop
+ static constexpr TPoolId CpuShared = MaxPools; // special value for `assigned` meaning balancer disabled, pool scheduler is used instead
+
+ using TPoolWeight = ui16;
+ static constexpr TPoolWeight MinPoolWeight = 1;
+ static constexpr TPoolWeight DefPoolWeight = 32;
+ static constexpr TPoolWeight MaxPoolWeight = 1024;
+
+ using TWorkerId = ui16;
+ static constexpr TWorkerId WorkerBits = 11;
+ static constexpr TWorkerId MaxWorkers = 1 << WorkerBits;
+
+ using TThreadId = ui64;
+ static constexpr TThreadId UnknownThreadId = ui64(-1);
+
+ struct TMailboxType {
+ enum EType {
+ Inherited = -1, // inherit mailbox from parent
+ Simple = 0, // simplest queue under producer lock. fastest in no-contention case
+ Revolving = 1, // somewhat outdated, tries to be wait-free. replaced by ReadAsFilled
+ HTSwap = 2, // other simple lf queue, suggested for low-contention case
+ ReadAsFilled = 3, // wait-free queue, suggested for high-contention or latency critical
+ TinyReadAsFilled = 4, // same as 3 but with lower overhead
+ //Inplace;
+ //Direct;
+ //Virtual
+ };
+ };
+
+ struct TScopeId : std::pair<ui64, ui64> {
+ using TBase = std::pair<ui64, ui64>;
+ using TBase::TBase;
+ static const TScopeId LocallyGenerated;
+ };
+
+ static inline TString ScopeIdToString(const TScopeId& scopeId) {
+ return Sprintf("<%" PRIu64 ":%" PRIu64 ">", scopeId.first, scopeId.second);
+ }
+
+}
+
+template<>
+struct hash<NActors::TScopeId> : hash<std::pair<ui64, ui64>> {};
+
+class TAffinity;
diff --git a/library/cpp/actors/core/event.cpp b/library/cpp/actors/core/event.cpp
new file mode 100644
index 0000000000..33f8ce2aaf
--- /dev/null
+++ b/library/cpp/actors/core/event.cpp
@@ -0,0 +1,38 @@
+#include "event.h"
+#include "event_pb.h"
+
+namespace NActors {
+
+ const TScopeId TScopeId::LocallyGenerated{
+ Max<ui64>(), Max<ui64>()
+ };
+
+ TIntrusivePtr<TEventSerializedData> IEventHandle::ReleaseChainBuffer() {
+ if (Buffer) {
+ TIntrusivePtr<TEventSerializedData> result;
+ DoSwap(result, Buffer);
+ Event.Reset();
+ return result;
+ }
+ if (Event) {
+ TAllocChunkSerializer serializer;
+ Event->SerializeToArcadiaStream(&serializer);
+ auto chainBuf = serializer.Release(Event->IsExtendedFormat());
+ Event.Reset();
+ return chainBuf;
+ }
+ return new TEventSerializedData;
+ }
+
+ TIntrusivePtr<TEventSerializedData> IEventHandle::GetChainBuffer() {
+ if (Buffer)
+ return Buffer;
+ if (Event) {
+ TAllocChunkSerializer serializer;
+ Event->SerializeToArcadiaStream(&serializer);
+ Buffer = serializer.Release(Event->IsExtendedFormat());
+ return Buffer;
+ }
+ return new TEventSerializedData;
+ }
+}
diff --git a/library/cpp/actors/core/event.h b/library/cpp/actors/core/event.h
new file mode 100644
index 0000000000..6ff02aaf94
--- /dev/null
+++ b/library/cpp/actors/core/event.h
@@ -0,0 +1,344 @@
+#pragma once
+
+#include "defs.h"
+#include "actorid.h"
+#include "callstack.h"
+#include "event_load.h"
+
+#include <library/cpp/actors/wilson/wilson_trace.h>
+
+#include <util/system/hp_timer.h>
+#include <util/generic/maybe.h>
+
+namespace NActors {
+ class TChunkSerializer;
+
+ class ISerializerToStream {
+ public:
+ virtual bool SerializeToArcadiaStream(TChunkSerializer*) const = 0;
+ };
+
+ class IEventBase
+ : TNonCopyable,
+ public ISerializerToStream {
+ public:
+ // actual typing is performed by IEventHandle
+
+ virtual ~IEventBase() {
+ }
+
+ virtual TString ToStringHeader() const = 0;
+ virtual TString ToString() const {
+ return ToStringHeader();
+ }
+ virtual ui32 CalculateSerializedSize() const {
+ return 0;
+ }
+ virtual ui32 Type() const = 0;
+ virtual bool SerializeToArcadiaStream(TChunkSerializer*) const = 0;
+ virtual bool IsSerializable() const = 0;
+ virtual bool IsExtendedFormat() const {
+ return false;
+ }
+ virtual ui32 CalculateSerializedSizeCached() const {
+ return CalculateSerializedSize();
+ }
+ };
+
+ // fat handle
+ class IEventHandle : TNonCopyable {
+ struct TOnNondelivery {
+ TActorId Recipient;
+
+ TOnNondelivery(const TActorId& recipient)
+ : Recipient(recipient)
+ {
+ }
+ };
+
+ public:
+ template <typename TEv>
+ inline TEv* CastAsLocal() const noexcept {
+ auto fits = GetTypeRewrite() == TEv::EventType;
+
+ return fits ? static_cast<TEv*>(Event.Get()) : nullptr;
+ }
+
+ template <typename TEventType>
+ TEventType* Get() {
+ if (Type != TEventType::EventType)
+ Y_FAIL("Event type %" PRIu32 " doesn't match the expected type %" PRIu32, Type, TEventType::EventType);
+
+ if (!Event) {
+ Event.Reset(TEventType::Load(Buffer.Get()));
+ }
+
+ if (Event) {
+ return static_cast<TEventType*>(Event.Get());
+ }
+
+ Y_FAIL("Failed to Load() event type %" PRIu32 " class %s", Type, TypeName<TEventType>().data());
+ }
+
+ template <typename T>
+ TAutoPtr<T> Release() {
+ TAutoPtr<T> x = Get<T>();
+ Y_UNUSED(Event.Release());
+ Buffer.Reset();
+ return x;
+ }
+
+ enum EFlags {
+ FlagTrackDelivery = 1 << 0,
+ FlagForwardOnNondelivery = 1 << 1,
+ FlagSubscribeOnSession = 1 << 2,
+ FlagUseSubChannel = 1 << 3,
+ FlagGenerateUnsureUndelivered = 1 << 4,
+ FlagExtendedFormat = 1 << 5,
+ };
+
+ const ui32 Type;
+ const ui32 Flags;
+ const TActorId Recipient;
+ const TActorId Sender;
+ const ui64 Cookie;
+ const TScopeId OriginScopeId = TScopeId::LocallyGenerated; // filled in when the message is received from Interconnect
+
+ // if set, used by ActorSystem/Interconnect to report tracepoints
+ NWilson::TTraceId TraceId;
+
+ // filled if feeded by interconnect session
+ const TActorId InterconnectSession;
+
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+ ::NHPTimer::STime SendTime;
+#endif
+
+ static const size_t ChannelBits = 12;
+ static const size_t ChannelShift = (sizeof(ui32) << 3) - ChannelBits;
+
+#ifdef USE_ACTOR_CALLSTACK
+ TCallstack Callstack;
+#endif
+ ui16 GetChannel() const noexcept {
+ return Flags >> ChannelShift;
+ }
+
+ ui64 GetSubChannel() const noexcept {
+ return Flags & FlagUseSubChannel ? Sender.LocalId() : 0ULL;
+ }
+
+ static ui32 MakeFlags(ui32 channel, ui32 flags) {
+ Y_VERIFY(channel < (1 << ChannelBits));
+ Y_VERIFY(flags < (1 << ChannelShift));
+ return (flags | (channel << ChannelShift));
+ }
+
+ private:
+ THolder<IEventBase> Event;
+ TIntrusivePtr<TEventSerializedData> Buffer;
+
+ TActorId RewriteRecipient;
+ ui32 RewriteType;
+
+ THolder<TOnNondelivery> OnNondeliveryHolder; // only for local events
+
+ public:
+ void Rewrite(ui32 typeRewrite, TActorId recipientRewrite) {
+ RewriteRecipient = recipientRewrite;
+ RewriteType = typeRewrite;
+ }
+
+ void DropRewrite() {
+ RewriteRecipient = Recipient;
+ RewriteType = Type;
+ }
+
+ const TActorId& GetRecipientRewrite() const {
+ return RewriteRecipient;
+ }
+
+ ui32 GetTypeRewrite() const {
+ return RewriteType;
+ }
+
+ TActorId GetForwardOnNondeliveryRecipient() const {
+ return OnNondeliveryHolder.Get() ? OnNondeliveryHolder->Recipient : TActorId();
+ }
+
+ IEventHandle(const TActorId& recipient, const TActorId& sender, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0,
+ const TActorId* forwardOnNondelivery = nullptr, NWilson::TTraceId traceId = {})
+ : Type(ev->Type())
+ , Flags(flags)
+ , Recipient(recipient)
+ , Sender(sender)
+ , Cookie(cookie)
+ , TraceId(std::move(traceId))
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+ , SendTime(0)
+#endif
+ , Event(ev)
+ , RewriteRecipient(Recipient)
+ , RewriteType(Type)
+ {
+ if (forwardOnNondelivery)
+ OnNondeliveryHolder.Reset(new TOnNondelivery(*forwardOnNondelivery));
+ }
+
+ IEventHandle(ui32 type,
+ ui32 flags,
+ const TActorId& recipient,
+ const TActorId& sender,
+ TIntrusivePtr<TEventSerializedData> buffer,
+ ui64 cookie,
+ const TActorId* forwardOnNondelivery = nullptr,
+ NWilson::TTraceId traceId = {})
+ : Type(type)
+ , Flags(flags)
+ , Recipient(recipient)
+ , Sender(sender)
+ , Cookie(cookie)
+ , TraceId(std::move(traceId))
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+ , SendTime(0)
+#endif
+ , Buffer(std::move(buffer))
+ , RewriteRecipient(Recipient)
+ , RewriteType(Type)
+ {
+ if (forwardOnNondelivery)
+ OnNondeliveryHolder.Reset(new TOnNondelivery(*forwardOnNondelivery));
+ }
+
+ // Special ctor for events from interconnect.
+ IEventHandle(const TActorId& session,
+ ui32 type,
+ ui32 flags,
+ const TActorId& recipient,
+ const TActorId& sender,
+ TIntrusivePtr<TEventSerializedData> buffer,
+ ui64 cookie,
+ TScopeId originScopeId,
+ NWilson::TTraceId traceId) noexcept
+ : Type(type)
+ , Flags(flags)
+ , Recipient(recipient)
+ , Sender(sender)
+ , Cookie(cookie)
+ , OriginScopeId(originScopeId)
+ , TraceId(std::move(traceId))
+ , InterconnectSession(session)
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+ , SendTime(0)
+#endif
+ , Buffer(std::move(buffer))
+ , RewriteRecipient(Recipient)
+ , RewriteType(Type)
+ {
+ }
+
+ TIntrusivePtr<TEventSerializedData> GetChainBuffer();
+ TIntrusivePtr<TEventSerializedData> ReleaseChainBuffer();
+
+ ui32 GetSize() const {
+ if (Buffer) {
+ return Buffer->GetSize();
+ } else if (Event) {
+ return Event->CalculateSerializedSize();
+ } else {
+ return 0;
+ }
+ }
+
+ bool HasBuffer() const {
+ return bool(Buffer);
+ }
+
+ bool HasEvent() const {
+ return bool(Event);
+ }
+
+ IEventBase* GetBase() {
+ if (!Event) {
+ if (!Buffer)
+ return nullptr;
+ else
+ ythrow TWithBackTrace<yexception>() << "don't know how to load the event from buffer";
+ }
+
+ return Event.Get();
+ }
+
+ TAutoPtr<IEventBase> ReleaseBase() {
+ TAutoPtr<IEventBase> x = GetBase();
+ Y_UNUSED(Event.Release());
+ Buffer.Reset();
+ return x;
+ }
+
+ TAutoPtr<IEventHandle> Forward(const TActorId& dest) {
+ if (Event)
+ return new IEventHandle(dest, Sender, Event.Release(), Flags, Cookie, nullptr, std::move(TraceId));
+ else
+ return new IEventHandle(Type, Flags, dest, Sender, Buffer, Cookie, nullptr, std::move(TraceId));
+ }
+
+ TAutoPtr<IEventHandle> ForwardOnNondelivery(ui32 reason, bool unsure = false);
+ };
+
+ template <typename TEventType>
+ class TEventHandle: public IEventHandle {
+ TEventHandle(); // we never made instance of TEventHandle
+ public:
+ TEventType* Get() {
+ return IEventHandle::Get<TEventType>();
+ }
+
+ TAutoPtr<TEventType> Release() {
+ return IEventHandle::Release<TEventType>();
+ }
+ };
+
+ static_assert(sizeof(TEventHandle<IEventBase>) == sizeof(IEventHandle), "expect sizeof(TEventHandle<IEventBase>) == sizeof(IEventHandle)");
+
+ template <typename TEventType, ui32 EventType0>
+ class TEventBase: public IEventBase {
+ public:
+ static constexpr ui32 EventType = EventType0;
+ ui32 Type() const override {
+ return EventType0;
+ }
+ // still abstract
+
+ typedef TEventHandle<TEventType> THandle;
+ typedef TAutoPtr<THandle> TPtr;
+ };
+
+#define DEFINE_SIMPLE_LOCAL_EVENT(eventType, header) \
+ TString ToStringHeader() const override { \
+ return TString(header); \
+ } \
+ bool SerializeToArcadiaStream(NActors::TChunkSerializer*) const override { \
+ Y_FAIL("Local event " #eventType " is not serializable"); \
+ } \
+ static IEventBase* Load(NActors::TEventSerializedData*) { \
+ Y_FAIL("Local event " #eventType " has no load method"); \
+ } \
+ bool IsSerializable() const override { \
+ return false; \
+ }
+
+#define DEFINE_SIMPLE_NONLOCAL_EVENT(eventType, header) \
+ TString ToStringHeader() const override { \
+ return TString(header); \
+ } \
+ bool SerializeToArcadiaStream(NActors::TChunkSerializer*) const override { \
+ return true; \
+ } \
+ static IEventBase* Load(NActors::TEventSerializedData*) { \
+ return new eventType(); \
+ } \
+ bool IsSerializable() const override { \
+ return true; \
+ }
+}
diff --git a/library/cpp/actors/core/event_load.h b/library/cpp/actors/core/event_load.h
new file mode 100644
index 0000000000..0dab1dd374
--- /dev/null
+++ b/library/cpp/actors/core/event_load.h
@@ -0,0 +1,112 @@
+#pragma once
+
+#include <util/stream/walk.h>
+#include <util/system/types.h>
+#include <util/generic/string.h>
+#include <library/cpp/actors/util/rope.h>
+#include <library/cpp/actors/wilson/wilson_trace.h>
+
+namespace NActors {
+ class IEventHandle;
+
+ struct TConstIoVec {
+ const void* Data;
+ size_t Size;
+ };
+
+ struct TIoVec {
+ void* Data;
+ size_t Size;
+ };
+
+ class TEventSerializedData
+ : public TThrRefBase
+ {
+ TRope Rope;
+ bool ExtendedFormat = false;
+
+ public:
+ TEventSerializedData() = default;
+
+ TEventSerializedData(TRope&& rope, bool extendedFormat)
+ : Rope(std::move(rope))
+ , ExtendedFormat(extendedFormat)
+ {}
+
+ TEventSerializedData(const TEventSerializedData& original, TString extraBuffer)
+ : Rope(original.Rope)
+ , ExtendedFormat(original.ExtendedFormat)
+ {
+ Append(std::move(extraBuffer));
+ }
+
+ TEventSerializedData(TString buffer, bool extendedFormat)
+ : ExtendedFormat(extendedFormat)
+ {
+ Append(std::move(buffer));
+ }
+
+ void SetExtendedFormat() {
+ ExtendedFormat = true;
+ }
+
+ bool IsExtendedFormat() const {
+ return ExtendedFormat;
+ }
+
+ TRope::TConstIterator GetBeginIter() const {
+ return Rope.Begin();
+ }
+
+ size_t GetSize() const {
+ return Rope.GetSize();
+ }
+
+ TString GetString() const {
+ TString result;
+ result.reserve(GetSize());
+ for (auto it = Rope.Begin(); it.Valid(); it.AdvanceToNextContiguousBlock()) {
+ result.append(it.ContiguousData(), it.ContiguousSize());
+ }
+ return result;
+ }
+
+ TRope EraseBack(size_t count) {
+ Y_VERIFY(count <= Rope.GetSize());
+ TRope::TIterator iter = Rope.End();
+ iter -= count;
+ return Rope.Extract(iter, Rope.End());
+ }
+
+ void Append(TRope&& from) {
+ Rope.Insert(Rope.End(), std::move(from));
+ }
+
+ void Append(TString buffer) {
+ if (buffer) {
+ Rope.Insert(Rope.End(), TRope(std::move(buffer)));
+ }
+ }
+ };
+}
+
+class TChainBufWalk : public IWalkInput {
+ TIntrusivePtr<NActors::TEventSerializedData> Buffer;
+ TRope::TConstIterator Iter;
+
+public:
+ TChainBufWalk(TIntrusivePtr<NActors::TEventSerializedData> buffer)
+ : Buffer(std::move(buffer))
+ , Iter(Buffer->GetBeginIter())
+ {}
+
+private:
+ size_t DoUnboundedNext(const void **ptr) override {
+ const size_t size = Iter.ContiguousSize();
+ *ptr = Iter.ContiguousData();
+ if (Iter.Valid()) {
+ Iter.AdvanceToNextContiguousBlock();
+ }
+ return size;
+ }
+};
diff --git a/library/cpp/actors/core/event_local.h b/library/cpp/actors/core/event_local.h
new file mode 100644
index 0000000000..2845aa94dd
--- /dev/null
+++ b/library/cpp/actors/core/event_local.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include "event.h"
+#include "scheduler_cookie.h"
+#include "event_load.h"
+#include <util/system/type_name.h>
+
+namespace NActors {
+ template <typename TEv, ui32 TEventType>
+ class TEventLocal: public TEventBase<TEv, TEventType> {
+ public:
+ TString ToStringHeader() const override {
+ return TypeName<TEv>();
+ }
+
+ bool SerializeToArcadiaStream(TChunkSerializer* /*serializer*/) const override {
+ Y_FAIL("Serialization of local event %s type %" PRIu32, TypeName<TEv>().data(), TEventType);
+ }
+
+ bool IsSerializable() const override {
+ return false;
+ }
+
+ static IEventBase* Load(TEventSerializedData*) {
+ Y_FAIL("Loading of local event %s type %" PRIu32, TypeName<TEv>().data(), TEventType);
+ }
+ };
+
+ template <typename TEv, ui32 TEventType>
+ class TEventScheduler: public TEventLocal<TEv, TEventType> {
+ public:
+ TSchedulerCookieHolder Cookie;
+
+ TEventScheduler(ISchedulerCookie* cookie)
+ : Cookie(cookie)
+ {
+ }
+ };
+
+ template <ui32 TEventType>
+ class TEventSchedulerEv: public TEventScheduler<TEventSchedulerEv<TEventType>, TEventType> {
+ public:
+ TEventSchedulerEv(ISchedulerCookie* cookie)
+ : TEventScheduler<TEventSchedulerEv<TEventType>, TEventType>(cookie)
+ {
+ }
+ };
+
+ template <typename TEv, ui32 TEventType>
+ class TEventSimple: public TEventBase<TEv, TEventType> {
+ public:
+ TString ToStringHeader() const override {
+ static TString header(TypeName<TEv>());
+ return header;
+ }
+
+ bool SerializeToArcadiaStream(TChunkSerializer* /*serializer*/) const override {
+ static_assert(sizeof(TEv) == sizeof(TEventSimple<TEv, TEventType>), "Descendant should be an empty class");
+ return true;
+ }
+
+ bool IsSerializable() const override {
+ return true;
+ }
+
+ static IEventBase* Load(NActors::TEventSerializedData*) {
+ return new TEv();
+ }
+
+ static IEventBase* Load(const TString&) {
+ return new TEv();
+ }
+ };
+}
diff --git a/library/cpp/actors/core/event_pb.cpp b/library/cpp/actors/core/event_pb.cpp
new file mode 100644
index 0000000000..018ff9ac34
--- /dev/null
+++ b/library/cpp/actors/core/event_pb.cpp
@@ -0,0 +1,223 @@
+#include "event_pb.h"
+
+namespace NActors {
+ bool TRopeStream::Next(const void** data, int* size) {
+ *data = Iter.ContiguousData();
+ *size = Iter.ContiguousSize();
+ if (size_t(*size + TotalByteCount) > Size) {
+ *size = Size - TotalByteCount;
+ Iter += *size;
+ } else if (Iter.Valid()) {
+ Iter.AdvanceToNextContiguousBlock();
+ }
+ TotalByteCount += *size;
+ return *size != 0;
+ }
+
+ void TRopeStream::BackUp(int count) {
+ Y_VERIFY(count <= TotalByteCount);
+ Iter -= count;
+ TotalByteCount -= count;
+ }
+
+ bool TRopeStream::Skip(int count) {
+ if (static_cast<size_t>(TotalByteCount + count) > Size) {
+ count = Size - TotalByteCount;
+ }
+ Iter += count;
+ TotalByteCount += count;
+ return static_cast<size_t>(TotalByteCount) != Size;
+ }
+
+ TCoroutineChunkSerializer::TCoroutineChunkSerializer()
+ : TotalSerializedDataSize(0)
+ , Stack(64 * 1024)
+ , SelfClosure{this, TArrayRef(Stack.Begin(), Stack.End())}
+ , InnerContext(SelfClosure)
+ {}
+
+ TCoroutineChunkSerializer::~TCoroutineChunkSerializer() {
+ CancelFlag = true;
+ Resume();
+ Y_VERIFY(Finished);
+ }
+
+ bool TCoroutineChunkSerializer::AllowsAliasing() const {
+ return true;
+ }
+
+ bool TCoroutineChunkSerializer::Produce(const void *data, size_t size) {
+ Y_VERIFY(size <= SizeRemain);
+ SizeRemain -= size;
+ TotalSerializedDataSize += size;
+
+ if (NumChunks) {
+ auto& last = Chunks[NumChunks - 1];
+ if (last.first + last.second == data) {
+ last.second += size; // just extend the last buffer
+ return true;
+ }
+ }
+
+ if (NumChunks == MaxChunks) {
+ InnerContext.SwitchTo(BufFeedContext);
+ if (CancelFlag || AbortFlag) {
+ return false;
+ }
+ }
+
+ Y_VERIFY(NumChunks < MaxChunks);
+ Chunks[NumChunks++] = {static_cast<const char*>(data), size};
+ return true;
+ }
+
+ bool TCoroutineChunkSerializer::WriteAliasedRaw(const void* data, int size) {
+ Y_VERIFY(size >= 0);
+ while (size) {
+ if (CancelFlag || AbortFlag) {
+ return false;
+ } else if (const size_t bytesToAppend = Min<size_t>(size, SizeRemain)) {
+ if (!Produce(data, bytesToAppend)) {
+ return false;
+ }
+ data = static_cast<const char*>(data) + bytesToAppend;
+ size -= bytesToAppend;
+ } else {
+ InnerContext.SwitchTo(BufFeedContext);
+ }
+ }
+ return true;
+ }
+
+ bool TCoroutineChunkSerializer::Next(void** data, int* size) {
+ if (CancelFlag || AbortFlag) {
+ return false;
+ }
+ if (!SizeRemain) {
+ InnerContext.SwitchTo(BufFeedContext);
+ if (CancelFlag || AbortFlag) {
+ return false;
+ }
+ }
+ Y_VERIFY(SizeRemain);
+ *data = BufferPtr;
+ *size = SizeRemain;
+ BufferPtr += SizeRemain;
+ return Produce(*data, *size);
+ }
+
+ void TCoroutineChunkSerializer::BackUp(int count) {
+ if (!count) {
+ return;
+ }
+ Y_VERIFY(count > 0);
+ Y_VERIFY(NumChunks);
+ TChunk& buf = Chunks[NumChunks - 1];
+ Y_VERIFY((size_t)count <= buf.second);
+ Y_VERIFY(buf.first + buf.second == BufferPtr);
+ buf.second -= count;
+ if (!buf.second) {
+ --NumChunks;
+ }
+ BufferPtr -= count;
+ SizeRemain += count;
+ TotalSerializedDataSize -= count;
+ }
+
+ void TCoroutineChunkSerializer::Resume() {
+ TContMachineContext feedContext;
+ BufFeedContext = &feedContext;
+ feedContext.SwitchTo(&InnerContext);
+ BufFeedContext = nullptr;
+ }
+
+ bool TCoroutineChunkSerializer::WriteRope(const TRope *rope) {
+ for (auto iter = rope->Begin(); iter.Valid(); iter.AdvanceToNextContiguousBlock()) {
+ if (!WriteAliasedRaw(iter.ContiguousData(), iter.ContiguousSize())) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ bool TCoroutineChunkSerializer::WriteString(const TString *s) {
+ return WriteAliasedRaw(s->data(), s->length());
+ }
+
+ std::pair<TCoroutineChunkSerializer::TChunk*, TCoroutineChunkSerializer::TChunk*> TCoroutineChunkSerializer::FeedBuf(void* data, size_t size) {
+ // fill in base params
+ BufferPtr = static_cast<char*>(data);
+ SizeRemain = size;
+
+ // transfer control to the coroutine
+ Y_VERIFY(Event);
+ NumChunks = 0;
+ Resume();
+
+ return {Chunks, Chunks + NumChunks};
+ }
+
+ void TCoroutineChunkSerializer::SetSerializingEvent(const IEventBase *event) {
+ Y_VERIFY(Event == nullptr);
+ Event = event;
+ TotalSerializedDataSize = 0;
+ AbortFlag = false;
+ }
+
+ void TCoroutineChunkSerializer::Abort() {
+ Y_VERIFY(Event);
+ AbortFlag = true;
+ Resume();
+ }
+
+ void TCoroutineChunkSerializer::DoRun() {
+ while (!CancelFlag) {
+ Y_VERIFY(Event);
+ SerializationSuccess = Event->SerializeToArcadiaStream(this);
+ Event = nullptr;
+ if (!CancelFlag) { // cancel flag may have been received during serialization
+ InnerContext.SwitchTo(BufFeedContext);
+ }
+ }
+ Finished = true;
+ InnerContext.SwitchTo(BufFeedContext);
+ }
+
+ bool TAllocChunkSerializer::Next(void** pdata, int* psize) {
+ if (Backup) {
+ // we have some data in backup rope -- move the first chunk from the backup rope to the buffer and return
+ // pointer to the buffer; it is safe to remove 'const' here as we uniquely own this buffer
+ TRope::TIterator iter = Backup.Begin();
+ *pdata = const_cast<char*>(iter.ContiguousData());
+ *psize = iter.ContiguousSize();
+ iter.AdvanceToNextContiguousBlock();
+ Buffers->Append(Backup.Extract(Backup.Begin(), iter));
+ } else {
+ // no backup buffer, so we have to create new one
+ auto item = TRopeAlignedBuffer::Allocate(4096);
+ *pdata = item->GetBuffer();
+ *psize = item->GetCapacity();
+ Buffers->Append(TRope(std::move(item)));
+ }
+ return true;
+ }
+
+ void TAllocChunkSerializer::BackUp(int count) {
+ Backup.Insert(Backup.Begin(), Buffers->EraseBack(count));
+ }
+
+ bool TAllocChunkSerializer::WriteAliasedRaw(const void*, int) {
+ Y_VERIFY(false);
+ return false;
+ }
+
+ bool TAllocChunkSerializer::WriteRope(const TRope *rope) {
+ Buffers->Append(TRope(*rope));
+ return true;
+ }
+
+ bool TAllocChunkSerializer::WriteString(const TString *s) {
+ Buffers->Append(*s);
+ return true;
+ }
+}
diff --git a/library/cpp/actors/core/event_pb.h b/library/cpp/actors/core/event_pb.h
new file mode 100644
index 0000000000..d7546b901a
--- /dev/null
+++ b/library/cpp/actors/core/event_pb.h
@@ -0,0 +1,500 @@
+#pragma once
+
+#include "event.h"
+#include "event_load.h"
+
+#include <google/protobuf/io/zero_copy_stream.h>
+#include <google/protobuf/arena.h>
+#include <library/cpp/actors/protos/actors.pb.h>
+#include <util/generic/deque.h>
+#include <util/system/context.h>
+#include <util/system/filemap.h>
+#include <array>
+
+namespace NActors {
+
+ class TRopeStream : public NProtoBuf::io::ZeroCopyInputStream {
+ TRope::TConstIterator Iter;
+ const size_t Size;
+
+ public:
+ TRopeStream(TRope::TConstIterator iter, size_t size)
+ : Iter(iter)
+ , Size(size)
+ {}
+
+ bool Next(const void** data, int* size) override;
+ void BackUp(int count) override;
+ bool Skip(int count) override;
+ int64_t ByteCount() const override {
+ return TotalByteCount;
+ }
+
+ private:
+ int64_t TotalByteCount = 0;
+ };
+
+ class TChunkSerializer : public NProtoBuf::io::ZeroCopyOutputStream {
+ public:
+ TChunkSerializer() = default;
+ virtual ~TChunkSerializer() = default;
+
+ virtual bool WriteRope(const TRope *rope) = 0;
+ virtual bool WriteString(const TString *s) = 0;
+ };
+
+ class TAllocChunkSerializer final : public TChunkSerializer {
+ public:
+ bool Next(void** data, int* size) override;
+ void BackUp(int count) override;
+ int64_t ByteCount() const override {
+ return Buffers->GetSize();
+ }
+ bool WriteAliasedRaw(const void* data, int size) override;
+
+ // WARNING: these methods require owner to retain ownership and immutability of passed objects
+ bool WriteRope(const TRope *rope) override;
+ bool WriteString(const TString *s) override;
+
+ inline TIntrusivePtr<TEventSerializedData> Release(bool extendedFormat) {
+ if (extendedFormat) {
+ Buffers->SetExtendedFormat();
+ }
+ return std::move(Buffers);
+ }
+
+ protected:
+ TIntrusivePtr<TEventSerializedData> Buffers = new TEventSerializedData;
+ TRope Backup;
+ };
+
+ class TCoroutineChunkSerializer final : public TChunkSerializer, protected ITrampoLine {
+ public:
+ using TChunk = std::pair<const char*, size_t>;
+
+ TCoroutineChunkSerializer();
+ ~TCoroutineChunkSerializer();
+
+ void SetSerializingEvent(const IEventBase *event);
+ void Abort();
+ std::pair<TChunk*, TChunk*> FeedBuf(void* data, size_t size);
+ bool IsComplete() const {
+ return !Event;
+ }
+ bool IsSuccessfull() const {
+ return SerializationSuccess;
+ }
+ const IEventBase *GetCurrentEvent() const {
+ return Event;
+ }
+
+ bool Next(void** data, int* size) override;
+ void BackUp(int count) override;
+ int64_t ByteCount() const override {
+ return TotalSerializedDataSize;
+ }
+ bool WriteAliasedRaw(const void* data, int size) override;
+ bool AllowsAliasing() const override;
+
+ bool WriteRope(const TRope *rope) override;
+ bool WriteString(const TString *s) override;
+
+ protected:
+ void DoRun() override;
+ void Resume();
+ bool Produce(const void *data, size_t size);
+
+ i64 TotalSerializedDataSize;
+ TMappedAllocation Stack;
+ TContClosure SelfClosure;
+ TContMachineContext InnerContext;
+ TContMachineContext *BufFeedContext = nullptr;
+ char *BufferPtr;
+ size_t SizeRemain;
+ static constexpr size_t MaxChunks = 16;
+ TChunk Chunks[MaxChunks];
+ size_t NumChunks = 0;
+ const IEventBase *Event = nullptr;
+ bool CancelFlag = false;
+ bool AbortFlag;
+ bool SerializationSuccess;
+ bool Finished = false;
+ };
+
+#ifdef ACTORLIB_HUGE_PB_SIZE
+ static const size_t EventMaxByteSize = 140 << 20; // (140MB)
+#else
+ static const size_t EventMaxByteSize = 67108000;
+#endif
+
+ template <typename TEv, typename TRecord /*protobuf record*/, ui32 TEventType, typename TRecHolder>
+ class TEventPBBase: public TEventBase<TEv, TEventType> , public TRecHolder {
+ // a vector of data buffers referenced by record; if filled, then extended serialization mechanism applies
+ TVector<TRope> Payload;
+
+ public:
+ using TRecHolder::Record;
+
+ public:
+ using ProtoRecordType = TRecord;
+
+ TEventPBBase() = default;
+
+ explicit TEventPBBase(const TRecord& rec)
+ {
+ Record = rec;
+ }
+
+ explicit TEventPBBase(TRecord&& rec)
+ {
+ Record = std::move(rec);
+ }
+
+ TString ToStringHeader() const override {
+ return Record.GetTypeName();
+ }
+
+ TString ToString() const override {
+ return Record.ShortDebugString();
+ }
+
+ bool IsSerializable() const override {
+ return true;
+ }
+
+ bool IsExtendedFormat() const override {
+ return static_cast<bool>(Payload);
+ }
+
+ bool SerializeToArcadiaStream(TChunkSerializer* chunker) const override {
+ // serialize payload first
+ if (Payload) {
+ void *data;
+ int size = 0;
+ auto append = [&](const char *p, size_t len) {
+ while (len) {
+ if (size) {
+ const size_t numBytesToCopy = std::min<size_t>(size, len);
+ memcpy(data, p, numBytesToCopy);
+ data = static_cast<char*>(data) + numBytesToCopy;
+ size -= numBytesToCopy;
+ p += numBytesToCopy;
+ len -= numBytesToCopy;
+ } else if (!chunker->Next(&data, &size)) {
+ return false;
+ }
+ }
+ return true;
+ };
+ auto appendNumber = [&](size_t number) {
+ char buf[MaxNumberBytes];
+ return append(buf, SerializeNumber(number, buf));
+ };
+ char marker = PayloadMarker;
+ append(&marker, 1);
+ if (!appendNumber(Payload.size())) {
+ return false;
+ }
+ for (const TRope& rope : Payload) {
+ if (!appendNumber(rope.GetSize())) {
+ return false;
+ }
+ if (rope) {
+ if (size) {
+ chunker->BackUp(std::exchange(size, 0));
+ }
+ if (!chunker->WriteRope(&rope)) {
+ return false;
+ }
+ }
+ }
+ if (size) {
+ chunker->BackUp(size);
+ }
+ }
+
+ return Record.SerializeToZeroCopyStream(chunker);
+ }
+
+ ui32 CalculateSerializedSize() const override {
+ ssize_t result = Record.ByteSize();
+ if (result >= 0 && Payload) {
+ ++result; // marker
+ char buf[MaxNumberBytes];
+ result += SerializeNumber(Payload.size(), buf);
+ for (const TRope& rope : Payload) {
+ result += SerializeNumber(rope.GetSize(), buf);
+ result += rope.GetSize();
+ }
+ }
+ return result;
+ }
+
+ static IEventBase* Load(TIntrusivePtr<TEventSerializedData> input) {
+ THolder<TEventPBBase> ev(new TEv());
+ if (!input->GetSize()) {
+ Y_PROTOBUF_SUPPRESS_NODISCARD ev->Record.ParseFromString(TString());
+ } else {
+ TRope::TConstIterator iter = input->GetBeginIter();
+ ui64 size = input->GetSize();
+
+ if (input->IsExtendedFormat()) {
+ // check marker
+ if (!iter.Valid() || *iter.ContiguousData() != PayloadMarker) {
+ Y_FAIL("invalid event");
+ }
+ // skip marker
+ iter += 1;
+ --size;
+ // parse number of payload ropes
+ size_t numRopes = DeserializeNumber(iter, size);
+ if (numRopes == Max<size_t>()) {
+ Y_FAIL("invalid event");
+ }
+ while (numRopes--) {
+ // parse length of the rope
+ const size_t len = DeserializeNumber(iter, size);
+ if (len == Max<size_t>() || size < len) {
+ Y_FAIL("invalid event len# %zu size# %" PRIu64, len, size);
+ }
+ // extract the rope
+ TRope::TConstIterator begin = iter;
+ iter += len;
+ size -= len;
+ ev->Payload.emplace_back(begin, iter);
+ }
+ }
+
+ // parse the protobuf
+ TRopeStream stream(iter, size);
+ if (!ev->Record.ParseFromZeroCopyStream(&stream)) {
+ Y_FAIL("Failed to parse protobuf event type %" PRIu32 " class %s", TEventType, TypeName(ev->Record).data());
+ }
+ }
+ ev->CachedByteSize = input->GetSize();
+ return ev.Release();
+ }
+
+ size_t GetCachedByteSize() const {
+ if (CachedByteSize == 0) {
+ CachedByteSize = CalculateSerializedSize();
+ }
+ return CachedByteSize;
+ }
+
+ ui32 CalculateSerializedSizeCached() const override {
+ return GetCachedByteSize();
+ }
+
+ void InvalidateCachedByteSize() {
+ CachedByteSize = 0;
+ }
+
+ public:
+ void ReservePayload(size_t size) {
+ Payload.reserve(size);
+ }
+
+ ui32 AddPayload(TRope&& rope) {
+ const ui32 id = Payload.size();
+ Payload.push_back(std::move(rope));
+ InvalidateCachedByteSize();
+ return id;
+ }
+
+ const TRope& GetPayload(ui32 id) const {
+ Y_VERIFY(id < Payload.size());
+ return Payload[id];
+ }
+
+ ui32 GetPayloadCount() const {
+ return Payload.size();
+ }
+
+ void StripPayload() {
+ Payload.clear();
+ }
+
+ protected:
+ mutable size_t CachedByteSize = 0;
+
+ static constexpr char PayloadMarker = 0x07;
+ static constexpr size_t MaxNumberBytes = (sizeof(size_t) * CHAR_BIT + 6) / 7;
+
+ static size_t SerializeNumber(size_t num, char *buffer) {
+ char *begin = buffer;
+ do {
+ *buffer++ = (num & 0x7F) | (num >= 128 ? 0x80 : 0x00);
+ num >>= 7;
+ } while (num);
+ return buffer - begin;
+ }
+
+ static size_t DeserializeNumber(const char **ptr, const char *end) {
+ const char *p = *ptr;
+ size_t res = 0;
+ size_t offset = 0;
+ for (;;) {
+ if (p == end) {
+ return Max<size_t>();
+ }
+ const char byte = *p++;
+ res |= (static_cast<size_t>(byte) & 0x7F) << offset;
+ offset += 7;
+ if (!(byte & 0x80)) {
+ break;
+ }
+ }
+ *ptr = p;
+ return res;
+ }
+
+ static size_t DeserializeNumber(TRope::TConstIterator& iter, ui64& size) {
+ size_t res = 0;
+ size_t offset = 0;
+ for (;;) {
+ if (!iter.Valid()) {
+ return Max<size_t>();
+ }
+ const char byte = *iter.ContiguousData();
+ iter += 1;
+ --size;
+ res |= (static_cast<size_t>(byte) & 0x7F) << offset;
+ offset += 7;
+ if (!(byte & 0x80)) {
+ break;
+ }
+ }
+ return res;
+ }
+ };
+
+ // Protobuf record not using arena
+ template <typename TRecord>
+ struct TRecordHolder {
+ TRecord Record;
+ };
+
+ // Protobuf arena and a record allocated on it
+ template <typename TRecord, size_t InitialBlockSize, size_t MaxBlockSize>
+ struct TArenaRecordHolder {
+ google::protobuf::Arena PbArena;
+ TRecord& Record;
+
+ static const google::protobuf::ArenaOptions GetArenaOptions() {
+ google::protobuf::ArenaOptions opts;
+ opts.initial_block_size = InitialBlockSize;
+ opts.max_block_size = MaxBlockSize;
+ return opts;
+ }
+
+ TArenaRecordHolder()
+ : PbArena(GetArenaOptions())
+ , Record(*google::protobuf::Arena::CreateMessage<TRecord>(&PbArena))
+ {}
+ };
+
+ template <typename TEv, typename TRecord, ui32 TEventType>
+ class TEventPB : public TEventPBBase<TEv, TRecord, TEventType, TRecordHolder<TRecord> > {
+ typedef TEventPBBase<TEv, TRecord, TEventType, TRecordHolder<TRecord> > TPbBase;
+ // NOTE: No extra fields allowed: TEventPB must be a "template typedef"
+ public:
+ using TPbBase::TPbBase;
+ };
+
+ template <typename TEv, typename TRecord, ui32 TEventType, size_t InitialBlockSize = 512, size_t MaxBlockSize = 16*1024>
+ using TEventPBWithArena = TEventPBBase<TEv, TRecord, TEventType, TArenaRecordHolder<TRecord, InitialBlockSize, MaxBlockSize> >;
+
+ template <typename TEv, typename TRecord, ui32 TEventType>
+ class TEventShortDebugPB: public TEventPB<TEv, TRecord, TEventType> {
+ public:
+ using TBase = TEventPB<TEv, TRecord, TEventType>;
+ TEventShortDebugPB() = default;
+ explicit TEventShortDebugPB(const TRecord& rec)
+ : TBase(rec)
+ {
+ }
+ explicit TEventShortDebugPB(TRecord&& rec)
+ : TBase(std::move(rec))
+ {
+ }
+ TString ToString() const override {
+ return TypeName<TEv>() + " { " + TBase::Record.ShortDebugString() + " }";
+ }
+ };
+
+ template <typename TEv, typename TRecord, ui32 TEventType>
+ class TEventPreSerializedPB: public TEventPB<TEv, TRecord, TEventType> {
+ protected:
+ using TBase = TEventPB<TEv, TRecord, TEventType>;
+ using TSelf = TEventPreSerializedPB<TEv, TRecord, TEventType>;
+ using TBase::Record;
+
+ public:
+ TString PreSerializedData; // already serialized PB data (using message::SerializeToString)
+
+ TEventPreSerializedPB() = default;
+
+ explicit TEventPreSerializedPB(const TRecord& rec)
+ : TBase(rec)
+ {
+ }
+
+ explicit TEventPreSerializedPB(TRecord&& rec)
+ : TBase(std::move(rec))
+ {
+ }
+
+ // when remote event received locally this method will merge preserialized data
+ const TRecord& GetRecord() {
+ TRecord& base(TBase::Record);
+ if (!PreSerializedData.empty()) {
+ TRecord copy;
+ Y_PROTOBUF_SUPPRESS_NODISCARD copy.ParseFromString(PreSerializedData);
+ copy.MergeFrom(base);
+ base.Swap(&copy);
+ PreSerializedData.clear();
+ }
+ return TBase::Record;
+ }
+
+ const TRecord& GetRecord() const {
+ return const_cast<TSelf*>(this)->GetRecord();
+ }
+
+ TRecord* MutableRecord() {
+ GetRecord(); // Make sure PreSerializedData is parsed
+ return &(TBase::Record);
+ }
+
+ TString ToString() const override {
+ return GetRecord().ShortDebugString();
+ }
+
+ bool SerializeToArcadiaStream(TChunkSerializer* chunker) const override {
+ return chunker->WriteString(&PreSerializedData) && TBase::SerializeToArcadiaStream(chunker);
+ }
+
+ ui32 CalculateSerializedSize() const override {
+ return PreSerializedData.size() + TBase::CalculateSerializedSize();
+ }
+
+ size_t GetCachedByteSize() const {
+ return PreSerializedData.size() + TBase::GetCachedByteSize();
+ }
+
+ ui32 CalculateSerializedSizeCached() const override {
+ return GetCachedByteSize();
+ }
+ };
+
+ inline TActorId ActorIdFromProto(const NActorsProto::TActorId& actorId) {
+ return TActorId(actorId.GetRawX1(), actorId.GetRawX2());
+ }
+
+ inline void ActorIdToProto(const TActorId& src, NActorsProto::TActorId* dest) {
+ Y_VERIFY_DEBUG(dest);
+ dest->SetRawX1(src.RawX1());
+ dest->SetRawX2(src.RawX2());
+ }
+}
diff --git a/library/cpp/actors/core/event_pb_payload_ut.cpp b/library/cpp/actors/core/event_pb_payload_ut.cpp
new file mode 100644
index 0000000000..eab007bc15
--- /dev/null
+++ b/library/cpp/actors/core/event_pb_payload_ut.cpp
@@ -0,0 +1,154 @@
+#include "event_pb.h"
+#include "events.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/actors/protos/unittests.pb.h>
+
+using namespace NActors;
+
+enum {
+ EvMessageWithPayload = EventSpaceBegin(TEvents::ES_PRIVATE),
+ EvArenaMessage,
+ EvArenaMessageBig,
+ EvMessageWithPayloadPreSerialized
+};
+
+struct TEvMessageWithPayload : TEventPB<TEvMessageWithPayload, TMessageWithPayload, EvMessageWithPayload> {
+ TEvMessageWithPayload() = default;
+ explicit TEvMessageWithPayload(const TMessageWithPayload& p)
+ : TEventPB<TEvMessageWithPayload, TMessageWithPayload, EvMessageWithPayload>(p)
+ {}
+};
+
+struct TEvMessageWithPayloadPreSerialized : TEventPreSerializedPB<TEvMessageWithPayloadPreSerialized, TMessageWithPayload, EvMessageWithPayloadPreSerialized> {
+};
+
+
+TRope MakeStringRope(const TString& message) {
+ return message ? TRope(message) : TRope();
+}
+
+TString MakeString(size_t len) {
+ TString res;
+ for (size_t i = 0; i < len; ++i) {
+ res += RandomNumber<char>();
+ }
+ return res;
+}
+
+Y_UNIT_TEST_SUITE(TEventProtoWithPayload) {
+
+ template <class TEventFrom, class TEventTo>
+ void TestSerializeDeserialize(size_t size1, size_t size2) {
+ static_assert(TEventFrom::EventType == TEventTo::EventType, "Must be same event type");
+
+ TEventFrom msg;
+ msg.Record.SetMeta("hello, world!");
+ msg.Record.AddPayloadId(msg.AddPayload(MakeStringRope(MakeString(size1))));
+ msg.Record.AddPayloadId(msg.AddPayload(MakeStringRope(MakeString(size2))));
+ msg.Record.AddSomeData(MakeString((size1 + size2) % 50 + 11));
+
+ auto serializer = MakeHolder<TAllocChunkSerializer>();
+ msg.SerializeToArcadiaStream(serializer.Get());
+ auto buffers = serializer->Release(msg.IsExtendedFormat());
+ UNIT_ASSERT_VALUES_EQUAL(buffers->GetSize(), msg.CalculateSerializedSize());
+ TString ser = buffers->GetString();
+
+ TString chunkerRes;
+ TCoroutineChunkSerializer chunker;
+ chunker.SetSerializingEvent(&msg);
+ while (!chunker.IsComplete()) {
+ char buffer[4096];
+ auto range = chunker.FeedBuf(buffer, sizeof(buffer));
+ for (auto p = range.first; p != range.second; ++p) {
+ chunkerRes += TString(p->first, p->second);
+ }
+ }
+ UNIT_ASSERT_VALUES_EQUAL(chunkerRes, ser);
+
+ THolder<IEventBase> ev2 = THolder(TEventTo::Load(buffers));
+ TEventTo& msg2 = static_cast<TEventTo&>(*ev2);
+ UNIT_ASSERT_VALUES_EQUAL(msg2.Record.GetMeta(), msg.Record.GetMeta());
+ UNIT_ASSERT_EQUAL(msg2.GetPayload(msg2.Record.GetPayloadId(0)), msg.GetPayload(msg.Record.GetPayloadId(0)));
+ UNIT_ASSERT_EQUAL(msg2.GetPayload(msg2.Record.GetPayloadId(1)), msg.GetPayload(msg.Record.GetPayloadId(1)));
+ }
+
+ template <class TEvent>
+ void TestAllSizes(size_t step1 = 100, size_t step2 = 111) {
+ for (size_t size1 = 0; size1 < 10000; size1 += step1) {
+ for (size_t size2 = 0; size2 < 10000; size2 += step2) {
+ TestSerializeDeserialize<TEvent, TEvent>(size1, size2);
+ }
+ }
+ }
+
+#if (!defined(_tsan_enabled_))
+ Y_UNIT_TEST(SerializeDeserialize) {
+ TestAllSizes<TEvMessageWithPayload>();
+ }
+#endif
+
+
+ struct TEvArenaMessage : TEventPBWithArena<TEvArenaMessage, TMessageWithPayload, EvArenaMessage> {
+ };
+
+ Y_UNIT_TEST(SerializeDeserializeArena) {
+ TestAllSizes<TEvArenaMessage>(500, 111);
+ }
+
+
+ struct TEvArenaMessageBig : TEventPBWithArena<TEvArenaMessageBig, TMessageWithPayload, EvArenaMessageBig, 4000, 32000> {
+ };
+
+ Y_UNIT_TEST(SerializeDeserializeArenaBig) {
+ TestAllSizes<TEvArenaMessageBig>(111, 500);
+ }
+
+
+ // Compatible with TEvArenaMessage but doesn't use arenas
+ struct TEvArenaMessageWithoutArena : TEventPB<TEvArenaMessageWithoutArena, TMessageWithPayload, EvArenaMessage> {
+ };
+
+ Y_UNIT_TEST(Compatibility) {
+ TestSerializeDeserialize<TEvArenaMessage, TEvArenaMessageWithoutArena>(200, 14010);
+ TestSerializeDeserialize<TEvArenaMessageWithoutArena, TEvArenaMessage>(2000, 4010);
+ }
+
+ Y_UNIT_TEST(PreSerializedCompatibility) {
+ // ensure TEventPreSerializedPB and TEventPB are interchangable with no compatibility issues
+ TMessageWithPayload msg;
+ msg.SetMeta("hello, world!");
+ msg.AddPayloadId(123);
+ msg.AddPayloadId(999);
+ msg.AddSomeData("abc");
+ msg.AddSomeData("xyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz");
+
+ TEvMessageWithPayloadPreSerialized e1;
+ Y_PROTOBUF_SUPPRESS_NODISCARD msg.SerializeToString(&e1.PreSerializedData);
+
+ auto serializer1 = MakeHolder<TAllocChunkSerializer>();
+ e1.SerializeToArcadiaStream(serializer1.Get());
+ auto buffers1 = serializer1->Release(e1.IsExtendedFormat());
+ UNIT_ASSERT_VALUES_EQUAL(buffers1->GetSize(), e1.CalculateSerializedSize());
+ TString ser1 = buffers1->GetString();
+
+ TEvMessageWithPayload e2(msg);
+ auto serializer2 = MakeHolder<TAllocChunkSerializer>();
+ e2.SerializeToArcadiaStream(serializer2.Get());
+ auto buffers2 = serializer2->Release(e2.IsExtendedFormat());
+ UNIT_ASSERT_VALUES_EQUAL(buffers2->GetSize(), e2.CalculateSerializedSize());
+ TString ser2 = buffers2->GetString();
+ UNIT_ASSERT_VALUES_EQUAL(ser1, ser2);
+
+ // deserialize
+ auto data = MakeIntrusive<TEventSerializedData>(ser1, false);
+ THolder<TEvMessageWithPayloadPreSerialized> parsedEvent(static_cast<TEvMessageWithPayloadPreSerialized*>(TEvMessageWithPayloadPreSerialized::Load(data)));
+ UNIT_ASSERT_VALUES_EQUAL(parsedEvent->PreSerializedData, ""); // this field is empty after deserialization
+ auto& record = parsedEvent->GetRecord();
+ UNIT_ASSERT_VALUES_EQUAL(record.GetMeta(), msg.GetMeta());
+ UNIT_ASSERT_VALUES_EQUAL(record.PayloadIdSize(), msg.PayloadIdSize());
+ UNIT_ASSERT_VALUES_EQUAL(record.PayloadIdSize(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(record.GetPayloadId(0), msg.GetPayloadId(0));
+ UNIT_ASSERT_VALUES_EQUAL(record.GetPayloadId(1), msg.GetPayloadId(1));
+ }
+}
diff --git a/library/cpp/actors/core/event_pb_ut.cpp b/library/cpp/actors/core/event_pb_ut.cpp
new file mode 100644
index 0000000000..a16c3092b3
--- /dev/null
+++ b/library/cpp/actors/core/event_pb_ut.cpp
@@ -0,0 +1,71 @@
+#include "event_pb.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/actors/protos/unittests.pb.h>
+
+Y_UNIT_TEST_SUITE(TEventSerialization) {
+ struct TMockEvent: public NActors::IEventBase {
+ TBigMessage* msg;
+ bool
+ SerializeToArcadiaStream(NActors::TChunkSerializer* chunker) const override {
+ return msg->SerializeToZeroCopyStream(chunker);
+ }
+ bool IsSerializable() const override {
+ return true;
+ }
+ TString ToStringHeader() const override {
+ return TString();
+ }
+ virtual TString Serialize() const {
+ return TString();
+ }
+ ui32 Type() const override {
+ return 0;
+ };
+ };
+
+ Y_UNIT_TEST(Coroutine) {
+ TString strA(507, 'a');
+ TString strB(814, 'b');
+ TString strC(198, 'c');
+
+ TBigMessage bm;
+
+ TSimple* simple0 = bm.AddSimples();
+ simple0->SetStr1(strA);
+ simple0->SetStr2(strB);
+ simple0->SetNumber1(213431324);
+
+ TSimple* simple1 = bm.AddSimples();
+ simple1->SetStr1(strC);
+ simple1->SetStr2(strA);
+ simple1->SetNumber1(21039313);
+
+ bm.AddManyStr(strA);
+ bm.AddManyStr(strC);
+ bm.AddManyStr(strB);
+
+ bm.SetOneMoreStr(strB);
+ bm.SetYANumber(394143);
+
+ TString bmSerialized;
+ Y_PROTOBUF_SUPPRESS_NODISCARD bm.SerializeToString(&bmSerialized);
+ UNIT_ASSERT_UNEQUAL(bmSerialized.size(), 0);
+
+ NActors::TCoroutineChunkSerializer chunker;
+ for (int i = 0; i < 4; ++i) {
+ TMockEvent event;
+ event.msg = &bm;
+ chunker.SetSerializingEvent(&event);
+ char buf1[87];
+ TString bmChunkedSerialized;
+ while (!chunker.IsComplete()) {
+ auto range = chunker.FeedBuf(&buf1[0], sizeof(buf1));
+ for (auto p = range.first; p != range.second; ++p) {
+ bmChunkedSerialized.append(p->first, p->second);
+ }
+ }
+ UNIT_ASSERT_EQUAL(bmSerialized, bmChunkedSerialized);
+ }
+ }
+}
diff --git a/library/cpp/actors/core/events.h b/library/cpp/actors/core/events.h
new file mode 100644
index 0000000000..702cf50fad
--- /dev/null
+++ b/library/cpp/actors/core/events.h
@@ -0,0 +1,222 @@
+#pragma once
+
+#include "event.h"
+#include "event_pb.h"
+
+#include <library/cpp/actors/protos/actors.pb.h>
+#include <util/system/unaligned_mem.h>
+
+namespace NActors {
+ struct TEvents {
+ enum EEventSpace {
+ ES_HELLOWORLD = 0,
+ ES_SYSTEM = 1,
+ ES_INTERCONNECT = 2,
+ ES_INTERCONNECT_MSGBUS = 3,
+ ES_DNS = 4,
+ ES_SOCKET_POLLER = 5,
+ ES_LOGGER = 6,
+ ES_MON = 7,
+ ES_INTERCONNECT_TCP = 8,
+ ES_PROFILER = 9,
+ ES_YF = 10,
+ ES_HTTP = 11,
+
+ ES_USERSPACE = 4096,
+
+ ES_PRIVATE = (1 << 15) - 16,
+ ES_MAX = (1 << 15),
+ };
+
+#define EventSpaceBegin(eventSpace) (eventSpace << 16u)
+#define EventSpaceEnd(eventSpace) ((eventSpace << 16u) + (1u << 16u))
+
+ struct THelloWorld {
+ enum {
+ Start = EventSpaceBegin(ES_HELLOWORLD),
+ Ping,
+ Pong,
+ Blob,
+ End
+ };
+
+ static_assert(End < EventSpaceEnd(ES_HELLOWORLD), "expect End < EventSpaceEnd(ES_HELLOWORLD)");
+ };
+
+ struct TEvPing: public TEventBase<TEvPing, THelloWorld::Ping> {
+ DEFINE_SIMPLE_NONLOCAL_EVENT(TEvPing, "HelloWorld: Ping");
+ };
+
+ struct TEvPong: public TEventBase<TEvPong, THelloWorld::Pong> {
+ DEFINE_SIMPLE_NONLOCAL_EVENT(TEvPong, "HelloWorld: Pong");
+ };
+
+ struct TEvBlob: public TEventBase<TEvBlob, THelloWorld::Blob> {
+ const TString Blob;
+
+ TEvBlob(const TString& blob) noexcept
+ : Blob(blob)
+ {
+ }
+
+ TString ToStringHeader() const noexcept override {
+ return "THelloWorld::Blob";
+ }
+
+ bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override {
+ return serializer->WriteString(&Blob);
+ }
+
+ static IEventBase* Load(TEventSerializedData* bufs) noexcept {
+ return new TEvBlob(bufs->GetString());
+ }
+
+ bool IsSerializable() const override {
+ return true;
+ }
+ };
+
+ struct TSystem {
+ enum {
+ Start = EventSpaceBegin(ES_SYSTEM),
+ Bootstrap, // generic bootstrap event
+ Wakeup, // generic timeout
+ Subscribe, // generic subscribe to something
+ Unsubscribe, // generic unsubscribe from something
+ Delivered, // event delivered
+ Undelivered, // event undelivered
+ Poison, // request actor to shutdown
+ Completed, // generic async job result event
+ PoisonTaken, // generic Poison taken (reply to PoisonPill event, i.e. died completely)
+ FlushLog,
+ CallbackCompletion,
+ CallbackException,
+ Gone, // Generic notification of actor death
+ TrackActor,
+ UntrackActor,
+ InvokeResult,
+ CoroTimeout,
+ InvokeQuery,
+ End,
+
+ // Compatibility section
+ PoisonPill = Poison,
+ ActorDied = Gone,
+ };
+
+ static_assert(End < EventSpaceEnd(ES_SYSTEM), "expect End < EventSpaceEnd(ES_SYSTEM)");
+ };
+
+ struct TEvBootstrap: public TEventBase<TEvBootstrap, TSystem::Bootstrap> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvBootstrap, "System: TEvBootstrap")
+ };
+
+ struct TEvPoison : public TEventBase<TEvPoison, TSystem::Poison> {
+ DEFINE_SIMPLE_NONLOCAL_EVENT(TEvPoison, "System: TEvPoison")
+ };
+
+ struct TEvWakeup: public TEventBase<TEvWakeup, TSystem::Wakeup> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvWakeup, "System: TEvWakeup")
+
+ TEvWakeup(ui64 tag = 0) : Tag(tag) { }
+
+ const ui64 Tag = 0;
+ };
+
+ struct TEvSubscribe: public TEventBase<TEvSubscribe, TSystem::Subscribe> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvSubscribe, "System: TEvSubscribe")
+ };
+
+ struct TEvUnsubscribe: public TEventBase<TEvUnsubscribe, TSystem::Unsubscribe> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvUnsubscribe, "System: TEvUnsubscribe")
+ };
+
+ struct TEvUndelivered: public TEventBase<TEvUndelivered, TSystem::Undelivered> {
+ enum EReason {
+ ReasonUnknown,
+ ReasonActorUnknown,
+ Disconnected
+ };
+ const ui32 SourceType;
+ const EReason Reason;
+ const bool Unsure;
+ const TString Data;
+
+ TEvUndelivered(ui32 sourceType, ui32 reason, bool unsure = false)
+ : SourceType(sourceType)
+ , Reason(static_cast<EReason>(reason))
+ , Unsure(unsure)
+ , Data(MakeData(sourceType, reason))
+ {}
+
+ TString ToStringHeader() const override;
+ bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override;
+ static IEventBase* Load(TEventSerializedData* bufs);
+ bool IsSerializable() const override;
+
+ ui32 CalculateSerializedSize() const override { return 2 * sizeof(ui32); }
+
+ static void Out(IOutputStream& o, EReason x);
+
+ private:
+ static TString MakeData(ui32 sourceType, ui32 reason) {
+ TString s = TString::Uninitialized(sizeof(ui32) + sizeof(ui32));
+ char *p = s.Detach();
+ WriteUnaligned<ui32>(p + 0, sourceType);
+ WriteUnaligned<ui32>(p + 4, reason);
+ return s;
+ }
+ };
+
+ struct TEvCompleted: public TEventBase<TEvCompleted, TSystem::Completed> {
+ const ui32 Id;
+ const ui32 Status;
+ TEvCompleted(ui32 id = 0, ui32 status = 0)
+ : Id(id)
+ , Status(status)
+ {
+ }
+
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvCompleted, "System: TEvCompleted")
+ };
+
+ struct TEvPoisonTaken: public TEventBase<TEvPoisonTaken, TSystem::PoisonTaken> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvPoisonTaken, "System: TEvPoisonTaken")
+ };
+
+ struct TEvFlushLog: public TEventBase<TEvFlushLog, TSystem::FlushLog> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvFlushLog, "System: TEvFlushLog")
+ };
+
+ struct TEvCallbackException: public TEventPB<TEvCallbackException,
+ NActorsProto::TCallbackException,
+ TSystem::CallbackException> {
+ TEvCallbackException(const TActorId& id, const TString& msg) {
+ ActorIdToProto(id, Record.MutableActorId());
+ Record.SetExceptionMessage(msg);
+ }
+ };
+
+ struct TEvCallbackCompletion: public TEventPB<TEvCallbackCompletion,
+ NActorsProto::TActorId,
+ TSystem::CallbackCompletion> {
+ TEvCallbackCompletion(const TActorId& id) {
+ ActorIdToProto(id, &Record);
+ }
+ };
+
+ struct TEvGone: public TEventBase<TEvGone, TSystem::Gone> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvGone, "System: TEvGone")
+ };
+
+ struct TEvInvokeResult;
+
+ using TEvPoisonPill = TEvPoison; // Legacy name, deprecated
+ using TEvActorDied = TEvGone;
+ };
+}
+
+template <>
+inline void Out<NActors::TEvents::TEvUndelivered::EReason>(IOutputStream& o, NActors::TEvents::TEvUndelivered::EReason x) {
+ NActors::TEvents::TEvUndelivered::Out(o, x);
+}
diff --git a/library/cpp/actors/core/events_undelivered.cpp b/library/cpp/actors/core/events_undelivered.cpp
new file mode 100644
index 0000000000..23deaffd10
--- /dev/null
+++ b/library/cpp/actors/core/events_undelivered.cpp
@@ -0,0 +1,60 @@
+#include "events.h"
+#include "actorsystem.h"
+
+namespace NActors {
+ TString TEvents::TEvUndelivered::ToStringHeader() const {
+ return "TSystem::Undelivered";
+ }
+
+ bool TEvents::TEvUndelivered::SerializeToArcadiaStream(TChunkSerializer *serializer) const {
+ Y_VERIFY(!Unsure); // these are local-only events generated by Interconnect
+ return serializer->WriteString(&Data);
+ }
+
+ void TEvents::TEvUndelivered::Out(IOutputStream& o, EReason x) {
+ switch (x) {
+ case ReasonActorUnknown:
+ o << "ActorUnknown";
+ break;
+ case Disconnected:
+ o << "Disconnected";
+ break;
+ default:
+ o << "Undefined";
+ break;
+ }
+ }
+
+ bool TEvents::TEvUndelivered::IsSerializable() const {
+ return true;
+ }
+
+ IEventBase* TEvents::TEvUndelivered::Load(TEventSerializedData* bufs) {
+ TString str = bufs->GetString();
+ Y_VERIFY(str.size() == (sizeof(ui32) + sizeof(ui32)));
+ const char* p = str.data();
+ const ui64 sourceType = ReadUnaligned<ui32>(p + 0);
+ const ui64 reason = ReadUnaligned<ui32>(p + 4);
+ return new TEvUndelivered(sourceType, reason);
+ }
+
+ TAutoPtr<IEventHandle> IEventHandle::ForwardOnNondelivery(ui32 reason, bool unsure) {
+ if (Flags & FlagForwardOnNondelivery) {
+ const ui32 updatedFlags = Flags & ~(FlagForwardOnNondelivery | FlagSubscribeOnSession);
+ const TActorId recp = OnNondeliveryHolder ? OnNondeliveryHolder->Recipient : TActorId();
+
+ if (Event)
+ return new IEventHandle(recp, Sender, Event.Release(), updatedFlags, Cookie, &Recipient, TraceId.Clone());
+ else
+ return new IEventHandle(Type, updatedFlags, recp, Sender, Buffer, Cookie, &Recipient, TraceId.Clone());
+ }
+
+ if (Flags & FlagTrackDelivery) {
+ const ui32 updatedFlags = Flags & ~(FlagTrackDelivery | FlagSubscribeOnSession | FlagGenerateUnsureUndelivered);
+ return new IEventHandle(Sender, Recipient, new TEvents::TEvUndelivered(Type, reason, unsure), updatedFlags,
+ Cookie, nullptr, TraceId.Clone());
+ }
+
+ return nullptr;
+ }
+}
diff --git a/library/cpp/actors/core/executelater.h b/library/cpp/actors/core/executelater.h
new file mode 100644
index 0000000000..e7a13c1005
--- /dev/null
+++ b/library/cpp/actors/core/executelater.h
@@ -0,0 +1,87 @@
+#pragma once
+
+#include "actor_bootstrapped.h"
+
+#include <utility>
+
+namespace NActors {
+ template <typename TCallback>
+ class TExecuteLater: public TActorBootstrapped<TExecuteLater<TCallback>> {
+ public:
+ static constexpr IActor::EActivityType ActorActivityType() {
+ return IActor::ACTORLIB_COMMON;
+ }
+
+ TExecuteLater(
+ TCallback&& callback,
+ IActor::EActivityType activityType,
+ ui32 channel = 0,
+ ui64 cookie = 0,
+ const TActorId& reportCompletionTo = TActorId(),
+ const TActorId& reportExceptionTo = TActorId()) noexcept
+ : Callback(std::move(callback))
+ , Channel(channel)
+ , Cookie(cookie)
+ , ReportCompletionTo(reportCompletionTo)
+ , ReportExceptionTo(reportExceptionTo)
+ {
+ this->SetActivityType(activityType);
+ }
+
+ void Bootstrap(const TActorContext& ctx) noexcept {
+ try {
+ {
+ /* RAII, Callback should be destroyed right before sending
+ TEvCallbackCompletion */
+
+ auto local = std::move(Callback);
+ using T = decltype(local);
+
+ if constexpr (std::is_invocable_v<T, const TActorContext&>) {
+ local(ctx);
+ } else {
+ local();
+ }
+ }
+
+ if (ReportCompletionTo) {
+ ctx.Send(ReportCompletionTo,
+ new TEvents::TEvCallbackCompletion(ctx.SelfID),
+ Channel, Cookie);
+ }
+ } catch (...) {
+ if (ReportExceptionTo) {
+ const TString msg = CurrentExceptionMessage();
+ ctx.Send(ReportExceptionTo,
+ new TEvents::TEvCallbackException(ctx.SelfID, msg),
+ Channel, Cookie);
+ }
+ }
+
+ this->Die(ctx);
+ }
+
+ private:
+ TCallback Callback;
+ const ui32 Channel;
+ const ui64 Cookie;
+ const TActorId ReportCompletionTo;
+ const TActorId ReportExceptionTo;
+ };
+
+ template <typename T>
+ IActor* CreateExecuteLaterActor(
+ T&& func,
+ IActor::EActivityType activityType,
+ ui32 channel = 0,
+ ui64 cookie = 0,
+ const TActorId& reportCompletionTo = TActorId(),
+ const TActorId& reportExceptionTo = TActorId()) noexcept {
+ return new TExecuteLater<T>(std::forward<T>(func),
+ activityType,
+ channel,
+ cookie,
+ reportCompletionTo,
+ reportExceptionTo);
+ }
+}
diff --git a/library/cpp/actors/core/executor_pool_base.cpp b/library/cpp/actors/core/executor_pool_base.cpp
new file mode 100644
index 0000000000..c3b9999168
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_base.cpp
@@ -0,0 +1,168 @@
+#include "executor_pool_base.h"
+#include "executor_thread.h"
+#include "mailbox.h"
+#include "probes.h"
+#include <library/cpp/actors/util/datetime.h>
+
+namespace NActors {
+ LWTRACE_USING(ACTORLIB_PROVIDER);
+
+ void DoActorInit(TActorSystem* sys, IActor* actor, const TActorId& self, const TActorId& owner) {
+ actor->SelfActorId = self;
+ actor->Registered(sys, owner);
+ }
+
+ TExecutorPoolBaseMailboxed::TExecutorPoolBaseMailboxed(ui32 poolId, ui32 maxActivityType)
+ : IExecutorPool(poolId)
+ , ActorSystem(nullptr)
+ , MailboxTable(new TMailboxTable)
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+ , Stats(maxActivityType)
+#endif
+ {}
+
+ TExecutorPoolBaseMailboxed::~TExecutorPoolBaseMailboxed() {
+ MailboxTable.Destroy();
+ }
+
+ TExecutorPoolBase::TExecutorPoolBase(ui32 poolId, ui32 threads, TAffinity* affinity, ui32 maxActivityType)
+ : TExecutorPoolBaseMailboxed(poolId, maxActivityType)
+ , PoolThreads(threads)
+ , ThreadsAffinity(affinity)
+ {}
+
+ TExecutorPoolBase::~TExecutorPoolBase() {
+ while (Activations.Pop(0))
+ ;
+ }
+
+ void TExecutorPoolBaseMailboxed::ReclaimMailbox(TMailboxType::EType mailboxType, ui32 hint, TWorkerId workerId, ui64 revolvingWriteCounter) {
+ Y_UNUSED(workerId);
+ MailboxTable->ReclaimMailbox(mailboxType, hint, revolvingWriteCounter);
+ }
+
+ ui64 TExecutorPoolBaseMailboxed::AllocateID() {
+ return ActorSystem->AllocateIDSpace(1);
+ }
+
+ bool TExecutorPoolBaseMailboxed::Send(TAutoPtr<IEventHandle>& ev) {
+ Y_VERIFY_DEBUG(ev->GetRecipientRewrite().PoolID() == PoolId);
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+ RelaxedStore(&ev->SendTime, (::NHPTimer::STime)GetCycleCountFast());
+#endif
+ return MailboxTable->SendTo(ev, this);
+ }
+
+ void TExecutorPoolBase::ScheduleActivation(ui32 activation) {
+ ScheduleActivationEx(activation, AtomicIncrement(ActivationsRevolvingCounter));
+ }
+
+ TActorId TExecutorPoolBaseMailboxed::Register(IActor* actor, TMailboxType::EType mailboxType, ui64 revolvingWriteCounter, const TActorId& parentId) {
+ NHPTimer::STime hpstart = GetCycleCountFast();
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+ ui32 at = actor->GetActivityType();
+ if (at >= Stats.MaxActivityType())
+ at = 0;
+ AtomicIncrement(Stats.ActorsAliveByActivity[at]);
+#endif
+ AtomicIncrement(ActorRegistrations);
+
+ // first step - find good enough mailbox
+ ui32 hint = 0;
+ TMailboxHeader* mailbox = nullptr;
+
+ if (revolvingWriteCounter == 0)
+ revolvingWriteCounter = AtomicIncrement(RegisterRevolvingCounter);
+
+ {
+ ui32 hintBackoff = 0;
+
+ while (hint == 0) {
+ hint = MailboxTable->AllocateMailbox(mailboxType, ++revolvingWriteCounter);
+ mailbox = MailboxTable->Get(hint);
+
+ if (!mailbox->LockFromFree()) {
+ MailboxTable->ReclaimMailbox(mailboxType, hintBackoff, ++revolvingWriteCounter);
+ hintBackoff = hint;
+ hint = 0;
+ }
+ }
+
+ MailboxTable->ReclaimMailbox(mailboxType, hintBackoff, ++revolvingWriteCounter);
+ }
+
+ const ui64 localActorId = AllocateID();
+
+ // ok, got mailbox
+ mailbox->AttachActor(localActorId, actor);
+
+ // do init
+ const TActorId actorId(ActorSystem->NodeId, PoolId, localActorId, hint);
+ DoActorInit(ActorSystem, actor, actorId, parentId);
+
+ // Once we unlock the mailbox the actor starts running and we cannot use the pointer any more
+ actor = nullptr;
+
+ switch (mailboxType) {
+ case TMailboxType::Simple:
+ UnlockFromExecution((TMailboxTable::TSimpleMailbox*)mailbox, this, false, hint, MaxWorkers, ++revolvingWriteCounter);
+ break;
+ case TMailboxType::Revolving:
+ UnlockFromExecution((TMailboxTable::TRevolvingMailbox*)mailbox, this, false, hint, MaxWorkers, ++revolvingWriteCounter);
+ break;
+ case TMailboxType::HTSwap:
+ UnlockFromExecution((TMailboxTable::THTSwapMailbox*)mailbox, this, false, hint, MaxWorkers, ++revolvingWriteCounter);
+ break;
+ case TMailboxType::ReadAsFilled:
+ UnlockFromExecution((TMailboxTable::TReadAsFilledMailbox*)mailbox, this, false, hint, MaxWorkers, ++revolvingWriteCounter);
+ break;
+ case TMailboxType::TinyReadAsFilled:
+ UnlockFromExecution((TMailboxTable::TTinyReadAsFilledMailbox*)mailbox, this, false, hint, MaxWorkers, ++revolvingWriteCounter);
+ break;
+ default:
+ Y_FAIL();
+ }
+
+ NHPTimer::STime elapsed = GetCycleCountFast() - hpstart;
+ if (elapsed > 1000000) {
+ LWPROBE(SlowRegisterNew, PoolId, NHPTimer::GetSeconds(elapsed) * 1000.0);
+ }
+
+ return actorId;
+ }
+
+ TActorId TExecutorPoolBaseMailboxed::Register(IActor* actor, TMailboxHeader* mailbox, ui32 hint, const TActorId& parentId) {
+ NHPTimer::STime hpstart = GetCycleCountFast();
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+ ui32 at = actor->GetActivityType();
+ if (at >= Stats.MaxActivityType())
+ at = 0;
+ AtomicIncrement(Stats.ActorsAliveByActivity[at]);
+#endif
+ AtomicIncrement(ActorRegistrations);
+
+ const ui64 localActorId = AllocateID();
+ mailbox->AttachActor(localActorId, actor);
+
+ const TActorId actorId(ActorSystem->NodeId, PoolId, localActorId, hint);
+ DoActorInit(ActorSystem, actor, actorId, parentId);
+ NHPTimer::STime elapsed = GetCycleCountFast() - hpstart;
+ if (elapsed > 1000000) {
+ LWPROBE(SlowRegisterAdd, PoolId, NHPTimer::GetSeconds(elapsed) * 1000.0);
+ }
+
+ return actorId;
+ }
+
+ TAffinity* TExecutorPoolBase::Affinity() const {
+ return ThreadsAffinity.Get();
+ }
+
+ bool TExecutorPoolBaseMailboxed::Cleanup() {
+ return MailboxTable->Cleanup();
+ }
+
+ ui32 TExecutorPoolBase::GetThreads() const {
+ return PoolThreads;
+ }
+}
diff --git a/library/cpp/actors/core/executor_pool_base.h b/library/cpp/actors/core/executor_pool_base.h
new file mode 100644
index 0000000000..c84ce1af77
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_base.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#include "actorsystem.h"
+#include "executor_thread.h"
+#include "scheduler_queue.h"
+#include <library/cpp/actors/util/affinity.h>
+#include <library/cpp/actors/util/unordered_cache.h>
+#include <library/cpp/actors/util/threadparkpad.h>
+
+namespace NActors {
+ class TExecutorPoolBaseMailboxed: public IExecutorPool {
+ protected:
+ TActorSystem* ActorSystem;
+ THolder<TMailboxTable> MailboxTable;
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+ // Need to have per pool object to collect stats like actor registrations (because
+ // registrations might be done in threads from other pools)
+ TExecutorThreadStats Stats;
+#endif
+ TAtomic RegisterRevolvingCounter = 0;
+ ui64 AllocateID();
+ public:
+ TExecutorPoolBaseMailboxed(ui32 poolId, ui32 maxActivityType);
+ ~TExecutorPoolBaseMailboxed();
+ void ReclaimMailbox(TMailboxType::EType mailboxType, ui32 hint, TWorkerId workerId, ui64 revolvingWriteCounter) override;
+ bool Send(TAutoPtr<IEventHandle>& ev) override;
+ TActorId Register(IActor* actor, TMailboxType::EType mailboxType, ui64 revolvingWriteCounter, const TActorId& parentId) override;
+ TActorId Register(IActor* actor, TMailboxHeader* mailbox, ui32 hint, const TActorId& parentId) override;
+ bool Cleanup() override;
+ };
+
+ class TExecutorPoolBase: public TExecutorPoolBaseMailboxed {
+ protected:
+ const ui32 PoolThreads;
+ TIntrusivePtr<TAffinity> ThreadsAffinity;
+ TAtomic Semaphore = 0;
+ TUnorderedCache<ui32, 512, 4> Activations;
+ TAtomic ActivationsRevolvingCounter = 0;
+ volatile bool StopFlag = false;
+ public:
+ TExecutorPoolBase(ui32 poolId, ui32 threads, TAffinity* affinity, ui32 maxActivityType);
+ ~TExecutorPoolBase();
+ void ScheduleActivation(ui32 activation) override;
+ TAffinity* Affinity() const override;
+ ui32 GetThreads() const override;
+ };
+
+ void DoActorInit(TActorSystem*, IActor*, const TActorId&, const TActorId&);
+}
diff --git a/library/cpp/actors/core/executor_pool_basic.cpp b/library/cpp/actors/core/executor_pool_basic.cpp
new file mode 100644
index 0000000000..4dce16939a
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_basic.cpp
@@ -0,0 +1,431 @@
+#include "executor_pool_basic.h"
+#include "probes.h"
+#include "mailbox.h"
+#include <library/cpp/actors/util/affinity.h>
+#include <library/cpp/actors/util/datetime.h>
+
+#ifdef _linux_
+#include <pthread.h>
+#endif
+
+namespace NActors {
+ LWTRACE_USING(ACTORLIB_PROVIDER);
+
+ constexpr TDuration TBasicExecutorPool::DEFAULT_TIME_PER_MAILBOX;
+
+ TBasicExecutorPool::TBasicExecutorPool(
+ ui32 poolId,
+ ui32 threads,
+ ui64 spinThreshold,
+ const TString& poolName,
+ TAffinity* affinity,
+ TDuration timePerMailbox,
+ ui32 eventsPerMailbox,
+ int realtimePriority,
+ ui32 maxActivityType)
+ : TExecutorPoolBase(poolId, threads, affinity, maxActivityType)
+ , SpinThreshold(spinThreshold)
+ , SpinThresholdCycles(spinThreshold * NHPTimer::GetCyclesPerSecond() * 0.000001) // convert microseconds to cycles
+ , Threads(new TThreadCtx[threads])
+ , PoolName(poolName)
+ , TimePerMailbox(timePerMailbox)
+ , EventsPerMailbox(eventsPerMailbox)
+ , RealtimePriority(realtimePriority)
+ , ThreadUtilization(0)
+ , MaxUtilizationCounter(0)
+ , MaxUtilizationAccumulator(0)
+ , ThreadCount(threads)
+ {
+ }
+
+ TBasicExecutorPool::TBasicExecutorPool(const TBasicExecutorPoolConfig& cfg)
+ : TBasicExecutorPool(
+ cfg.PoolId,
+ cfg.Threads,
+ cfg.SpinThreshold,
+ cfg.PoolName,
+ new TAffinity(cfg.Affinity),
+ cfg.TimePerMailbox,
+ cfg.EventsPerMailbox,
+ cfg.RealtimePriority,
+ cfg.MaxActivityType
+ )
+ {}
+
+ TBasicExecutorPool::~TBasicExecutorPool() {
+ Threads.Destroy();
+ }
+
+ ui32 TBasicExecutorPool::GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) {
+ ui32 workerId = wctx.WorkerId;
+ Y_VERIFY_DEBUG(workerId < PoolThreads);
+
+ NHPTimer::STime elapsed = 0;
+ NHPTimer::STime parked = 0;
+ NHPTimer::STime blocked = 0;
+ NHPTimer::STime hpstart = GetCycleCountFast();
+ NHPTimer::STime hpnow;
+
+ TThreadCtx& threadCtx = Threads[workerId];
+ AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_NONE);
+
+ if (Y_UNLIKELY(AtomicGet(threadCtx.BlockedFlag) != TThreadCtx::BS_NONE)) {
+ do {
+ if (AtomicCas(&threadCtx.BlockedFlag, TThreadCtx::BS_BLOCKED, TThreadCtx::BS_BLOCKING)) {
+ hpnow = GetCycleCountFast();
+ elapsed += hpnow - hpstart;
+ if (threadCtx.BlockedPad.Park()) // interrupted
+ return 0;
+ hpstart = GetCycleCountFast();
+ blocked += hpstart - hpnow;
+ }
+ } while (AtomicGet(threadCtx.BlockedFlag) != TThreadCtx::BS_NONE && !AtomicLoad(&StopFlag));
+ }
+
+ const TAtomic x = AtomicDecrement(Semaphore);
+
+ if (x < 0) {
+#if defined ACTORSLIB_COLLECT_EXEC_STATS
+ if (AtomicGetAndIncrement(ThreadUtilization) == 0) {
+ // Initially counter contains -t0, the pool start timestamp
+ // When the first thread goes to sleep we add t1, so the counter
+ // becomes t1-t0 >= 0, or the duration of max utilization so far.
+ // If the counter was negative and becomes positive, that means
+ // counter just turned into a duration and we should store that
+ // duration. Otherwise another thread raced with us and
+ // subtracted some other timestamp t2.
+ const i64 t = GetCycleCountFast();
+ const i64 x = AtomicGetAndAdd(MaxUtilizationCounter, t);
+ if (x < 0 && x + t > 0)
+ AtomicStore(&MaxUtilizationAccumulator, x + t);
+ }
+#endif
+
+ Y_VERIFY(AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_NONE);
+
+ if (SpinThreshold > 0) {
+ // spin configured period
+ AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_ACTIVE);
+ ui64 start = GetCycleCountFast();
+ bool doSpin = true;
+ while (true) {
+ for (ui32 j = 0; doSpin && j < 12; ++j) {
+ if (GetCycleCountFast() >= (start + SpinThresholdCycles)) {
+ doSpin = false;
+ break;
+ }
+ for (ui32 i = 0; i < 12; ++i) {
+ if (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_ACTIVE) {
+ SpinLockPause();
+ } else {
+ doSpin = false;
+ break;
+ }
+ }
+ }
+ if (!doSpin) {
+ break;
+ }
+ if (RelaxedLoad(&StopFlag)) {
+ break;
+ }
+ }
+ // then - sleep
+ if (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_ACTIVE) {
+ if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_BLOCKED, TThreadCtx::WS_ACTIVE)) {
+ do {
+ hpnow = GetCycleCountFast();
+ elapsed += hpnow - hpstart;
+ if (threadCtx.Pad.Park()) // interrupted
+ return 0;
+ hpstart = GetCycleCountFast();
+ parked += hpstart - hpnow;
+ } while (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_BLOCKED);
+ }
+ }
+ } else {
+ AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_BLOCKED);
+ do {
+ hpnow = GetCycleCountFast();
+ elapsed += hpnow - hpstart;
+ if (threadCtx.Pad.Park()) // interrupted
+ return 0;
+ hpstart = GetCycleCountFast();
+ parked += hpstart - hpnow;
+ } while (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_BLOCKED);
+ }
+
+ Y_VERIFY_DEBUG(AtomicLoad(&StopFlag) || AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_RUNNING);
+
+#if defined ACTORSLIB_COLLECT_EXEC_STATS
+ if (AtomicDecrement(ThreadUtilization) == 0) {
+ // When we started sleeping counter contained t1-t0, or the
+ // last duration of max utilization. Now we subtract t2 >= t1,
+ // which turns counter negative again, and the next sleep cycle
+ // at timestamp t3 would be adding some new duration t3-t2.
+ // If the counter was positive and becomes negative that means
+ // there are no current races with other threads and we should
+ // store the last positive duration we observed. Multiple
+ // threads may be adding and subtracting values in potentially
+ // arbitrary order, which would cause counter to oscillate
+ // around zero. When it crosses zero is a good indication of a
+ // correct value.
+ const i64 t = GetCycleCountFast();
+ const i64 x = AtomicGetAndAdd(MaxUtilizationCounter, -t);
+ if (x > 0 && x - t < 0)
+ AtomicStore(&MaxUtilizationAccumulator, x);
+ }
+#endif
+ } else {
+ AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING);
+ }
+
+ // ok, has work suggested, must dequeue
+ while (!RelaxedLoad(&StopFlag)) {
+ if (const ui32 activation = Activations.Pop(++revolvingCounter)) {
+ hpnow = GetCycleCountFast();
+ elapsed += hpnow - hpstart;
+ wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, elapsed);
+ if (parked > 0) {
+ wctx.AddParkedCycles(parked);
+ }
+ if (blocked > 0) {
+ wctx.AddBlockedCycles(blocked);
+ }
+ return activation;
+ }
+ SpinLockPause();
+ }
+
+ // stopping, die!
+ return 0;
+ }
+
+ inline void TBasicExecutorPool::WakeUpLoop() {
+ for (ui32 i = 0;;) {
+ TThreadCtx& threadCtx = Threads[i % PoolThreads];
+ switch (AtomicLoad(&threadCtx.WaitingFlag)) {
+ case TThreadCtx::WS_NONE:
+ case TThreadCtx::WS_RUNNING:
+ ++i;
+ break;
+ case TThreadCtx::WS_ACTIVE: // in active spin-lock, just set flag
+ if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_ACTIVE)) {
+ return;
+ }
+ break;
+ case TThreadCtx::WS_BLOCKED:
+ if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_BLOCKED)) {
+ threadCtx.Pad.Unpark();
+ return;
+ }
+ break;
+ default:
+ Y_FAIL();
+ }
+ }
+ }
+
+ void TBasicExecutorPool::ScheduleActivationEx(ui32 activation, ui64 revolvingCounter) {
+ Activations.Push(activation, revolvingCounter);
+ const TAtomic x = AtomicIncrement(Semaphore);
+ if (x <= 0) { // we must find someone to wake-up
+ WakeUpLoop();
+ }
+ }
+
+ void TBasicExecutorPool::GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const {
+ poolStats.MaxUtilizationTime = RelaxedLoad(&MaxUtilizationAccumulator) / (i64)(NHPTimer::GetCyclesPerSecond() / 1000);
+
+ statsCopy.resize(PoolThreads + 1);
+ // Save counters from the pool object
+ statsCopy[0] = TExecutorThreadStats();
+ statsCopy[0].Aggregate(Stats);
+ // Per-thread stats
+ for (size_t i = 0; i < PoolThreads; ++i) {
+ Threads[i].Thread->GetCurrentStats(statsCopy[i + 1]);
+ }
+ }
+
+ void TBasicExecutorPool::Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) {
+ TAffinityGuard affinityGuard(Affinity());
+
+ ActorSystem = actorSystem;
+
+ ScheduleReaders.Reset(new NSchedulerQueue::TReader[PoolThreads]);
+ ScheduleWriters.Reset(new NSchedulerQueue::TWriter[PoolThreads]);
+
+ for (ui32 i = 0; i != PoolThreads; ++i) {
+ Threads[i].Thread.Reset(
+ new TExecutorThread(
+ i,
+ 0, // CpuId is not used in BASIC pool
+ actorSystem,
+ this,
+ MailboxTable.Get(),
+ PoolName,
+ TimePerMailbox,
+ EventsPerMailbox));
+ ScheduleWriters[i].Init(ScheduleReaders[i]);
+ }
+
+ *scheduleReaders = ScheduleReaders.Get();
+ *scheduleSz = PoolThreads;
+ }
+
+ void TBasicExecutorPool::Start() {
+ TAffinityGuard affinityGuard(Affinity());
+
+ ThreadUtilization = 0;
+ AtomicAdd(MaxUtilizationCounter, -(i64)GetCycleCountFast());
+
+ for (ui32 i = 0; i != PoolThreads; ++i) {
+ Threads[i].Thread->Start();
+ }
+ }
+
+ void TBasicExecutorPool::PrepareStop() {
+ AtomicStore(&StopFlag, true);
+ for (ui32 i = 0; i != PoolThreads; ++i) {
+ Threads[i].Pad.Interrupt();
+ Threads[i].BlockedPad.Interrupt();
+ }
+ }
+
+ void TBasicExecutorPool::Shutdown() {
+ for (ui32 i = 0; i != PoolThreads; ++i)
+ Threads[i].Thread->Join();
+ }
+
+ void TBasicExecutorPool::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+ Y_VERIFY_DEBUG(workerId < PoolThreads);
+
+ Schedule(deadline - ActorSystem->Timestamp(), ev, cookie, workerId);
+ }
+
+ void TBasicExecutorPool::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+ Y_VERIFY_DEBUG(workerId < PoolThreads);
+
+ const auto current = ActorSystem->Monotonic();
+ if (deadline < current)
+ deadline = current;
+
+ ScheduleWriters[workerId].Push(deadline.MicroSeconds(), ev.Release(), cookie);
+ }
+
+ void TBasicExecutorPool::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+ Y_VERIFY_DEBUG(workerId < PoolThreads);
+
+ const auto deadline = ActorSystem->Monotonic() + delta;
+ ScheduleWriters[workerId].Push(deadline.MicroSeconds(), ev.Release(), cookie);
+ }
+
+ void TBasicExecutorPool::SetRealTimeMode() const {
+// TODO: musl-libc version of `sched_param` struct is for some reason different from pthread
+// version in Ubuntu 12.04
+#if defined(_linux_) && !defined(_musl_)
+ if (RealtimePriority != 0) {
+ pthread_t threadSelf = pthread_self();
+ sched_param param = {RealtimePriority};
+ if (pthread_setschedparam(threadSelf, SCHED_FIFO, &param)) {
+ Y_FAIL("Cannot set realtime priority");
+ }
+ }
+#else
+ Y_UNUSED(RealtimePriority);
+#endif
+ }
+
+ ui32 TBasicExecutorPool::GetThreadCount() const {
+ return AtomicGet(ThreadCount);
+ }
+
+ void TBasicExecutorPool::SetThreadCount(ui32 threads) {
+ threads = Max(1u, Min(PoolThreads, threads));
+ with_lock (ChangeThreadsLock) {
+ size_t prevCount = GetThreadCount();
+ AtomicSet(ThreadCount, threads);
+ if (prevCount < threads) {
+ for (size_t i = prevCount; i < threads; ++i) {
+ bool repeat = true;
+ while (repeat) {
+ switch (AtomicGet(Threads[i].BlockedFlag)) {
+ case TThreadCtx::BS_BLOCKING:
+ if (AtomicCas(&Threads[i].BlockedFlag, TThreadCtx::BS_NONE, TThreadCtx::BS_BLOCKING)) {
+ // thread not entry to blocked loop
+ repeat = false;
+ }
+ break;
+ case TThreadCtx::BS_BLOCKED:
+ // thread entry to blocked loop and we wake it
+ AtomicSet(Threads[i].BlockedFlag, TThreadCtx::BS_NONE);
+ Threads[i].BlockedPad.Unpark();
+ repeat = false;
+ break;
+ default:
+ // thread mustn't has TThreadCtx::BS_NONE because last time it was started to block
+ Y_FAIL("BlockedFlag is not TThreadCtx::BS_BLOCKING and TThreadCtx::BS_BLOCKED when thread was waked up");
+ }
+ }
+ }
+ } else if (prevCount > threads) {
+ // at first, start to block
+ for (size_t i = threads; i < prevCount; ++i) {
+ Y_VERIFY(AtomicGet(Threads[i].BlockedFlag) == TThreadCtx::BS_NONE);
+ AtomicSet(Threads[i].BlockedFlag, TThreadCtx::BS_BLOCKING);
+ }
+ // after check need to wake up threads
+ for (size_t idx = threads; idx < prevCount; ++idx) {
+ TThreadCtx& threadCtx = Threads[idx];
+ auto waitingFlag = AtomicGet(threadCtx.WaitingFlag);
+ auto blockedFlag = AtomicGet(threadCtx.BlockedFlag);
+ // while thread has this states (WS_NONE and BS_BLOCKING) we can't guess which way thread will go.
+ // Either go to sleep and it will have to wake up,
+ // or go to execute task and after completion will be blocked.
+ while (waitingFlag == TThreadCtx::WS_NONE && blockedFlag == TThreadCtx::BS_BLOCKING) {
+ waitingFlag = AtomicGet(threadCtx.WaitingFlag);
+ blockedFlag = AtomicGet(threadCtx.BlockedFlag);
+ }
+ // next states:
+ // 1) WS_ACTIVE BS_BLOCKING - waiting and start spinig | need wake up to block
+ // 2) WS_BLOCKED BS_BLOCKING - waiting and start sleep | need wake up to block
+ // 3) WS_RUNNING BS_BLOCKING - start execute | not need wake up, will block after executing
+ // 4) WS_NONE BS_BLOCKED - blocked | not need wake up, already blocked
+
+ if (waitingFlag == TThreadCtx::WS_ACTIVE || waitingFlag == TThreadCtx::WS_BLOCKED) {
+ // need wake up
+ Y_VERIFY(blockedFlag == TThreadCtx::BS_BLOCKING);
+
+ // creaty empty mailBoxHint, where LineIndex == 1 and LineHint == 0, and activations will be ignored
+ constexpr auto emptyMailBoxHint = TMailboxTable::LineIndexMask & -TMailboxTable::LineIndexMask;
+ ui64 revolvingCounter = AtomicGet(ActivationsRevolvingCounter);
+
+ Activations.Push(emptyMailBoxHint, revolvingCounter);
+
+ auto x = AtomicIncrement(Semaphore);
+ if (x <= 0) {
+ // try wake up. if success then go to next thread
+ switch (waitingFlag){
+ case TThreadCtx::WS_ACTIVE: // in active spin-lock, just set flag
+ if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_ACTIVE)) {
+ continue;
+ }
+ break;
+ case TThreadCtx::WS_BLOCKED:
+ if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_BLOCKED)) {
+ threadCtx.Pad.Unpark();
+ continue;
+ }
+ break;
+ default:
+ ; // other thread woke this sleeping thread
+ }
+ // if thread has already been awakened then we must awaken the other
+ WakeUpLoop();
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/library/cpp/actors/core/executor_pool_basic.h b/library/cpp/actors/core/executor_pool_basic.h
new file mode 100644
index 0000000000..023190f7fe
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_basic.h
@@ -0,0 +1,111 @@
+#pragma once
+
+#include "actorsystem.h"
+#include "executor_thread.h"
+#include "scheduler_queue.h"
+#include "executor_pool_base.h"
+#include <library/cpp/actors/util/unordered_cache.h>
+#include <library/cpp/actors/util/threadparkpad.h>
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+
+#include <util/system/mutex.h>
+
+namespace NActors {
+ class TBasicExecutorPool: public TExecutorPoolBase {
+ struct TThreadCtx {
+ TAutoPtr<TExecutorThread> Thread;
+ TThreadParkPad Pad;
+ TThreadParkPad BlockedPad;
+ TAtomic WaitingFlag;
+ TAtomic BlockedFlag;
+
+ // different threads must spin/block on different cache-lines.
+ // we add some padding bytes to enforce this rule
+ static const size_t SizeWithoutPadding = sizeof(TAutoPtr<TExecutorThread>) + 2 * sizeof(TThreadParkPad) + 2 * sizeof(TAtomic);
+ ui8 Padding[64 - SizeWithoutPadding];
+ static_assert(64 >= SizeWithoutPadding);
+
+ enum EWaitState {
+ WS_NONE,
+ WS_ACTIVE,
+ WS_BLOCKED,
+ WS_RUNNING
+ };
+
+ enum EBlockedState {
+ BS_NONE,
+ BS_BLOCKING,
+ BS_BLOCKED
+ };
+
+ TThreadCtx()
+ : WaitingFlag(WS_NONE)
+ , BlockedFlag(BS_NONE)
+ {
+ }
+ };
+
+ const ui64 SpinThreshold;
+ const ui64 SpinThresholdCycles;
+
+ TArrayHolder<TThreadCtx> Threads;
+
+ TArrayHolder<NSchedulerQueue::TReader> ScheduleReaders;
+ TArrayHolder<NSchedulerQueue::TWriter> ScheduleWriters;
+
+ const TString PoolName;
+ const TDuration TimePerMailbox;
+ const ui32 EventsPerMailbox;
+
+ const int RealtimePriority;
+
+ TAtomic ThreadUtilization;
+ TAtomic MaxUtilizationCounter;
+ TAtomic MaxUtilizationAccumulator;
+
+ TAtomic ThreadCount;
+ TMutex ChangeThreadsLock;
+
+ public:
+ static constexpr TDuration DEFAULT_TIME_PER_MAILBOX = TBasicExecutorPoolConfig::DEFAULT_TIME_PER_MAILBOX;
+ static constexpr ui32 DEFAULT_EVENTS_PER_MAILBOX = TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX;
+
+ TBasicExecutorPool(ui32 poolId,
+ ui32 threads,
+ ui64 spinThreshold,
+ const TString& poolName = "",
+ TAffinity* affinity = nullptr,
+ TDuration timePerMailbox = DEFAULT_TIME_PER_MAILBOX,
+ ui32 eventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX,
+ int realtimePriority = 0,
+ ui32 maxActivityType = 1);
+ explicit TBasicExecutorPool(const TBasicExecutorPoolConfig& cfg);
+ ~TBasicExecutorPool();
+
+ ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingReadCounter) override;
+
+ void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+ void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+ void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+
+ void ScheduleActivationEx(ui32 activation, ui64 revolvingWriteCounter) override;
+
+ void Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) override;
+ void Start() override;
+ void PrepareStop() override;
+ void Shutdown() override;
+
+ void GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const override;
+ TString GetName() const override {
+ return PoolName;
+ }
+
+ void SetRealTimeMode() const override;
+
+ ui32 GetThreadCount() const;
+ void SetThreadCount(ui32 threads);
+
+ private:
+ void WakeUpLoop();
+ };
+}
diff --git a/library/cpp/actors/core/executor_pool_basic_ut.cpp b/library/cpp/actors/core/executor_pool_basic_ut.cpp
new file mode 100644
index 0000000000..76dff693af
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_basic_ut.cpp
@@ -0,0 +1,435 @@
+#include "actorsystem.h"
+#include "executor_pool_basic.h"
+#include "hfunc.h"
+#include "scheduler_basic.h"
+
+#include <library/cpp/actors/util/should_continue.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/actors/protos/unittests.pb.h>
+
+using namespace NActors;
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TEvMsg : public NActors::TEventBase<TEvMsg, 10347> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvMsg, "ExecutorPoolTest: Msg");
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TTestSenderActor : public IActor {
+private:
+ using EActivityType = IActor::EActivityType ;
+ using EActorActivity = IActor::EActorActivity;
+
+private:
+ TAtomic Counter;
+ TActorId Receiver;
+
+ std::function<void(void)> Action;
+
+public:
+ TTestSenderActor(std::function<void(void)> action = [](){},
+ EActivityType activityType = EActorActivity::OTHER)
+ : IActor(static_cast<TReceiveFunc>(&TTestSenderActor::Execute), activityType)
+ , Action(action)
+ {}
+
+ void Start(TActorId receiver, size_t count)
+ {
+ AtomicSet(Counter, count);
+ Receiver = receiver;
+ }
+
+ void Stop() {
+ while (true) {
+ if (GetCounter() == 0) {
+ break;
+ }
+
+ Sleep(TDuration::MilliSeconds(1));
+ }
+ }
+
+ size_t GetCounter() const {
+ return AtomicGet(Counter);
+ }
+
+private:
+ STFUNC(Execute)
+ {
+ Y_UNUSED(ctx);
+ switch (ev->GetTypeRewrite()) {
+ hFunc(TEvMsg, Handle);
+ }
+ }
+
+ void Handle(TEvMsg::TPtr &ev)
+ {
+ Y_UNUSED(ev);
+ Action();
+ TAtomicBase count = AtomicDecrement(Counter);
+ Y_VERIFY(count != Max<TAtomicBase>());
+ if (count) {
+ Send(Receiver, new TEvMsg());
+ }
+ }
+};
+
+THolder<TActorSystemSetup> GetActorSystemSetup(TBasicExecutorPool* pool)
+{
+ auto setup = MakeHolder<NActors::TActorSystemSetup>();
+ setup->NodeId = 1;
+ setup->ExecutorsCount = 1;
+ setup->Executors.Reset(new TAutoPtr<NActors::IExecutorPool>[1]);
+ setup->Executors[0] = pool;
+ setup->Scheduler = new TBasicSchedulerThread(NActors::TSchedulerConfig(512, 0));
+ return setup;
+}
+
+Y_UNIT_TEST_SUITE(BasicExecutorPool) {
+
+ Y_UNIT_TEST(DecreaseIncreaseThreadsCount) {
+ const size_t msgCount = 1e4;
+ const size_t size = 4;
+ const size_t halfSize = size / 2;
+ TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50);
+
+ auto setup = GetActorSystemSetup(executorPool);
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+
+ executorPool->SetThreadCount(halfSize);
+ TTestSenderActor* actors[size];
+ TActorId actorIds[size];
+ for (size_t i = 0; i < size; ++i) {
+ actors[i] = new TTestSenderActor();
+ actorIds[i] = actorSystem.Register(actors[i]);
+ }
+
+ const int testCount = 2;
+
+ TExecutorPoolStats poolStats[testCount];
+ TVector<TExecutorThreadStats> statsCopy[testCount];
+
+ for (size_t testIdx = 0; testIdx < testCount; ++testIdx) {
+ for (size_t i = 0; i < size; ++i) {
+ actors[i]->Start(actors[i]->SelfId(), msgCount);
+ }
+ for (size_t i = 0; i < size; ++i) {
+ actorSystem.Send(actorIds[i], new TEvMsg());
+ }
+
+ Sleep(TDuration::MilliSeconds(100));
+
+ for (size_t i = 0; i < size; ++i) {
+ actors[i]->Stop();
+ }
+
+ executorPool->GetCurrentStats(poolStats[testIdx], statsCopy[testIdx]);
+ }
+
+ for (size_t i = 1; i <= halfSize; ++i) {
+ UNIT_ASSERT_UNEQUAL(statsCopy[0][i].ReceivedEvents, statsCopy[1][i].ReceivedEvents);
+ }
+
+ for (size_t i = halfSize + 1; i <= size; ++i) {
+ UNIT_ASSERT_EQUAL(statsCopy[0][i].ReceivedEvents, statsCopy[1][i].ReceivedEvents);
+ }
+
+ executorPool->SetThreadCount(size);
+
+ for (size_t testIdx = 0; testIdx < testCount; ++testIdx) {
+ for (size_t i = 0; i < size; ++i) {
+ actors[i]->Start(actors[i]->SelfId(), msgCount);
+ }
+ for (size_t i = 0; i < size; ++i) {
+ actorSystem.Send(actorIds[i], new TEvMsg());
+ }
+
+ Sleep(TDuration::MilliSeconds(100));
+
+ for (size_t i = 0; i < size; ++i) {
+ actors[i]->Stop();
+ }
+
+ executorPool->GetCurrentStats(poolStats[testIdx], statsCopy[testIdx]);
+ }
+
+ for (size_t i = 1; i <= size; ++i) {
+ UNIT_ASSERT_UNEQUAL(statsCopy[0][i].ReceivedEvents, statsCopy[1][i].ReceivedEvents);
+ }
+ }
+
+ Y_UNIT_TEST(ChangeCount) {
+ const size_t msgCount = 1e3;
+ const size_t size = 4;
+ const size_t halfSize = size / 2;
+ TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50);
+
+ auto begin = TInstant::Now();
+
+ auto setup = GetActorSystemSetup(executorPool);
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+ executorPool->SetThreadCount(halfSize);
+
+ TTestSenderActor* actors[size];
+ TActorId actorIds[size];
+ for (size_t i = 0; i < size; ++i) {
+ actors[i] = new TTestSenderActor();
+ actorIds[i] = actorSystem.Register(actors[i]);
+ }
+
+ for (size_t i = 0; i < size; ++i) {
+ actors[i]->Start(actorIds[i], msgCount);
+ }
+ for (size_t i = 0; i < size; ++i) {
+ actorSystem.Send(actorIds[i], new TEvMsg());
+ }
+
+ const i32 N = 6;
+ const i32 threadsCouns[N] = { 1, 3, 2, 3, 1, 4 };
+
+ ui64 counter = 0;
+
+ TTestSenderActor* changerActor = new TTestSenderActor([&]{
+ executorPool->SetThreadCount(threadsCouns[counter]);
+ counter++;
+ if (counter == N) {
+ counter = 0;
+ }
+ });
+ TActorId changerActorId = actorSystem.Register(changerActor);
+ changerActor->Start(changerActorId, msgCount);
+ actorSystem.Send(changerActorId, new TEvMsg());
+
+ while (true) {
+ size_t maxCounter = 0;
+ for (size_t i = 0; i < size; ++i) {
+ maxCounter = Max(maxCounter, actors[i]->GetCounter());
+ }
+
+ if (maxCounter == 0) {
+ break;
+ }
+
+ auto now = TInstant::Now();
+ UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Max counter is " << maxCounter);
+
+ Sleep(TDuration::MilliSeconds(1));
+ }
+
+ changerActor->Stop();
+ }
+
+ Y_UNIT_TEST(CheckCompleteOne) {
+ const size_t size = 4;
+ const size_t msgCount = 1e4;
+ TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50);
+
+ auto setup = GetActorSystemSetup(executorPool);
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+
+ auto begin = TInstant::Now();
+
+ auto actor = new TTestSenderActor();
+ auto actorId = actorSystem.Register(actor);
+ actor->Start(actor->SelfId(), msgCount);
+ actorSystem.Send(actorId, new TEvMsg());
+
+ while (actor->GetCounter()) {
+ auto now = TInstant::Now();
+ UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Counter is " << actor->GetCounter());
+
+ Sleep(TDuration::MilliSeconds(1));
+ }
+ }
+
+ Y_UNIT_TEST(CheckCompleteAll) {
+ const size_t size = 4;
+ const size_t msgCount = 1e4;
+ TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50);
+
+ auto setup = GetActorSystemSetup(executorPool);
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+
+ auto begin = TInstant::Now();
+
+ TTestSenderActor* actors[size];
+ TActorId actorIds[size];
+
+ for (size_t i = 0; i < size; ++i) {
+ actors[i] = new TTestSenderActor();
+ actorIds[i] = actorSystem.Register(actors[i]);
+ }
+ for (size_t i = 0; i < size; ++i) {
+ actors[i]->Start(actors[i]->SelfId(), msgCount);
+ }
+ for (size_t i = 0; i < size; ++i) {
+ actorSystem.Send(actorIds[i], new TEvMsg());
+ }
+
+
+ while (true) {
+ size_t maxCounter = 0;
+ for (size_t i = 0; i < size; ++i) {
+ maxCounter = Max(maxCounter, actors[i]->GetCounter());
+ }
+
+ if (maxCounter == 0) {
+ break;
+ }
+
+ auto now = TInstant::Now();
+ UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Max counter is " << maxCounter);
+
+ Sleep(TDuration::MilliSeconds(1));
+ }
+ }
+
+ Y_UNIT_TEST(CheckCompleteOver) {
+ const size_t size = 4;
+ const size_t actorsCount = size * 2;
+ const size_t msgCount = 1e4;
+ TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50);
+
+ auto setup = GetActorSystemSetup(executorPool);
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+
+ auto begin = TInstant::Now();
+
+ TTestSenderActor* actors[actorsCount];
+ TActorId actorIds[actorsCount];
+
+ for (size_t i = 0; i < actorsCount; ++i) {
+ actors[i] = new TTestSenderActor();
+ actorIds[i] = actorSystem.Register(actors[i]);
+ }
+ for (size_t i = 0; i < actorsCount; ++i) {
+ actors[i]->Start(actors[i]->SelfId(), msgCount);
+ }
+ for (size_t i = 0; i < actorsCount; ++i) {
+ actorSystem.Send(actorIds[i], new TEvMsg());
+ }
+
+
+ while (true) {
+ size_t maxCounter = 0;
+ for (size_t i = 0; i < actorsCount; ++i) {
+ maxCounter = Max(maxCounter, actors[i]->GetCounter());
+ }
+
+ if (maxCounter == 0) {
+ break;
+ }
+
+ auto now = TInstant::Now();
+ UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Max counter is " << maxCounter);
+
+ Sleep(TDuration::MilliSeconds(1));
+ }
+ }
+
+ Y_UNIT_TEST(CheckCompleteRoundRobinOver) {
+ const size_t size = 4;
+ const size_t actorsCount = size * 2;
+ const size_t msgCount = 1e2;
+ TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50);
+
+ auto setup = GetActorSystemSetup(executorPool);
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+
+ auto begin = TInstant::Now();
+
+ TTestSenderActor* actors[actorsCount];
+ TActorId actorIds[actorsCount];
+
+ for (size_t i = 0; i < actorsCount; ++i) {
+ actors[i] = new TTestSenderActor();
+ actorIds[i] = actorSystem.Register(actors[i]);
+ }
+ for (size_t i = 0; i < actorsCount; ++i) {
+ actors[i]->Start(actorIds[(i + 1) % actorsCount], msgCount);
+ }
+ for (size_t i = 0; i < actorsCount; ++i) {
+ actorSystem.Send(actorIds[i], new TEvMsg());
+ }
+
+ while (true) {
+ size_t maxCounter = 0;
+ for (size_t i = 0; i < actorsCount; ++i) {
+ maxCounter = Max(maxCounter, actors[i]->GetCounter());
+ }
+
+ if (maxCounter == 0) {
+ break;
+ }
+
+ auto now = TInstant::Now();
+ UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Max counter is " << maxCounter);
+
+ Sleep(TDuration::MilliSeconds(1));
+ }
+ }
+
+ Y_UNIT_TEST(CheckStats) {
+ const size_t size = 4;
+ const size_t msgCount = 1e4;
+ TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50);
+
+ auto setup = GetActorSystemSetup(executorPool);
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+
+ auto begin = TInstant::Now();
+
+ auto actor = new TTestSenderActor();
+ auto actorId = actorSystem.Register(actor);
+ actor->Start(actor->SelfId(), msgCount);
+ actorSystem.Send(actorId, new TEvMsg());
+
+ while (actor->GetCounter()) {
+ auto now = TInstant::Now();
+ UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Counter is " << actor->GetCounter());
+
+ Sleep(TDuration::MilliSeconds(1));
+ }
+
+ TVector<TExecutorThreadStats> stats;
+ TExecutorPoolStats poolStats;
+ actorSystem.GetPoolStats(0, poolStats, stats);
+ // Sum all per-thread counters into the 0th element
+ for (ui32 idx = 1; idx < stats.size(); ++idx) {
+ stats[0].Aggregate(stats[idx]);
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].SentEvents, msgCount - 1);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEvents, msgCount);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].PreemptedEvents, 0);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].NonDeliveredEvents, 0);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].EmptyMailboxActivation, 0);
+ //UNIT_ASSERT_VALUES_EQUAL(stats[0].CpuNs, 0); // depends on total duration of test, so undefined
+ UNIT_ASSERT(stats[0].ElapsedTicks > 0);
+ UNIT_ASSERT(stats[0].ParkedTicks > 0);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].BlockedTicks, 0);
+ UNIT_ASSERT(stats[0].ActivationTimeHistogram.TotalSamples >= msgCount / TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].EventDeliveryTimeHistogram.TotalSamples, msgCount);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].EventProcessingCountHistogram.TotalSamples, msgCount);
+ UNIT_ASSERT(stats[0].EventProcessingTimeHistogram.TotalSamples > 0);
+ UNIT_ASSERT(stats[0].ElapsedTicksByActivity[0] > 0);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEventsByActivity[0], msgCount);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].ActorsAliveByActivity[0], 1);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].ScheduledEventsByActivity[0], 0);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolActorRegistrations, 1);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolDestroyedActors, 0);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolAllocatedMailboxes, 4095); // one line
+ UNIT_ASSERT(stats[0].MailboxPushedOutByTime + stats[0].MailboxPushedOutByEventCount >= msgCount / TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].MailboxPushedOutBySoftPreemption, 0);
+ }
+}
diff --git a/library/cpp/actors/core/executor_pool_io.cpp b/library/cpp/actors/core/executor_pool_io.cpp
new file mode 100644
index 0000000000..fb557ae6b0
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_io.cpp
@@ -0,0 +1,151 @@
+#include "executor_pool_io.h"
+#include "mailbox.h"
+#include <library/cpp/actors/util/affinity.h>
+#include <library/cpp/actors/util/datetime.h>
+
+namespace NActors {
+ TIOExecutorPool::TIOExecutorPool(ui32 poolId, ui32 threads, const TString& poolName, TAffinity* affinity, ui32 maxActivityType)
+ : TExecutorPoolBase(poolId, threads, affinity, maxActivityType)
+ , Threads(new TThreadCtx[threads])
+ , PoolName(poolName)
+ {}
+
+ TIOExecutorPool::TIOExecutorPool(const TIOExecutorPoolConfig& cfg)
+ : TIOExecutorPool(
+ cfg.PoolId,
+ cfg.Threads,
+ cfg.PoolName,
+ new TAffinity(cfg.Affinity),
+ cfg.MaxActivityType
+ )
+ {}
+
+ TIOExecutorPool::~TIOExecutorPool() {
+ Threads.Destroy();
+ while (ThreadQueue.Pop(0))
+ ;
+ }
+
+ ui32 TIOExecutorPool::GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) {
+ ui32 workerId = wctx.WorkerId;
+ Y_VERIFY_DEBUG(workerId < PoolThreads);
+
+ NHPTimer::STime elapsed = 0;
+ NHPTimer::STime parked = 0;
+ NHPTimer::STime hpstart = GetCycleCountFast();
+ NHPTimer::STime hpnow;
+
+ const TAtomic x = AtomicDecrement(Semaphore);
+ if (x < 0) {
+ TThreadCtx& threadCtx = Threads[workerId];
+ ThreadQueue.Push(workerId + 1, revolvingCounter);
+ hpnow = GetCycleCountFast();
+ elapsed += hpnow - hpstart;
+ if (threadCtx.Pad.Park())
+ return 0;
+ hpstart = GetCycleCountFast();
+ parked += hpstart - hpnow;
+ }
+
+ while (!RelaxedLoad(&StopFlag)) {
+ if (const ui32 activation = Activations.Pop(++revolvingCounter)) {
+ hpnow = GetCycleCountFast();
+ elapsed += hpnow - hpstart;
+ wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, elapsed);
+ if (parked > 0) {
+ wctx.AddParkedCycles(parked);
+ }
+ return activation;
+ }
+ SpinLockPause();
+ }
+
+ return 0;
+ }
+
+ void TIOExecutorPool::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+ Schedule(deadline - ActorSystem->Timestamp(), ev, cookie, workerId);
+ }
+
+ void TIOExecutorPool::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+ Y_UNUSED(workerId);
+
+ const auto current = ActorSystem->Monotonic();
+ if (deadline < current)
+ deadline = current;
+
+ TTicketLock::TGuard guard(&ScheduleLock);
+ ScheduleQueue->Writer.Push(deadline.MicroSeconds(), ev.Release(), cookie);
+ }
+
+ void TIOExecutorPool::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+ Y_UNUSED(workerId);
+ const auto deadline = ActorSystem->Monotonic() + delta;
+
+ TTicketLock::TGuard guard(&ScheduleLock);
+ ScheduleQueue->Writer.Push(deadline.MicroSeconds(), ev.Release(), cookie);
+ }
+
+ void TIOExecutorPool::ScheduleActivationEx(ui32 activation, ui64 revolvingWriteCounter) {
+ Activations.Push(activation, revolvingWriteCounter);
+ const TAtomic x = AtomicIncrement(Semaphore);
+ if (x <= 0) {
+ for (;; ++revolvingWriteCounter) {
+ if (const ui32 x = ThreadQueue.Pop(revolvingWriteCounter)) {
+ const ui32 threadIdx = x - 1;
+ Threads[threadIdx].Pad.Unpark();
+ return;
+ }
+ SpinLockPause();
+ }
+ }
+ }
+
+ void TIOExecutorPool::Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) {
+ TAffinityGuard affinityGuard(Affinity());
+
+ ActorSystem = actorSystem;
+
+ ScheduleQueue.Reset(new NSchedulerQueue::TQueueType());
+
+ for (ui32 i = 0; i != PoolThreads; ++i) {
+ Threads[i].Thread.Reset(new TExecutorThread(i, 0, actorSystem, this, MailboxTable.Get(), PoolName));
+ }
+
+ *scheduleReaders = &ScheduleQueue->Reader;
+ *scheduleSz = 1;
+ }
+
+ void TIOExecutorPool::Start() {
+ TAffinityGuard affinityGuard(Affinity());
+
+ for (ui32 i = 0; i != PoolThreads; ++i)
+ Threads[i].Thread->Start();
+ }
+
+ void TIOExecutorPool::PrepareStop() {
+ AtomicStore(&StopFlag, true);
+ for (ui32 i = 0; i != PoolThreads; ++i)
+ Threads[i].Pad.Interrupt();
+ }
+
+ void TIOExecutorPool::Shutdown() {
+ for (ui32 i = 0; i != PoolThreads; ++i)
+ Threads[i].Thread->Join();
+ }
+
+ void TIOExecutorPool::GetCurrentStats(TExecutorPoolStats& /*poolStats*/, TVector<TExecutorThreadStats>& statsCopy) const {
+ statsCopy.resize(PoolThreads + 1);
+ // Save counters from the pool object
+ statsCopy[0] = TExecutorThreadStats();
+ statsCopy[0].Aggregate(Stats);
+ // Per-thread stats
+ for (size_t i = 0; i < PoolThreads; ++i) {
+ Threads[i].Thread->GetCurrentStats(statsCopy[i + 1]);
+ }
+ }
+
+ TString TIOExecutorPool::GetName() const {
+ return PoolName;
+ }
+}
diff --git a/library/cpp/actors/core/executor_pool_io.h b/library/cpp/actors/core/executor_pool_io.h
new file mode 100644
index 0000000000..e576d642a1
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_io.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#include "actorsystem.h"
+#include "executor_thread.h"
+#include "scheduler_queue.h"
+#include "executor_pool_base.h"
+#include <library/cpp/actors/util/ticket_lock.h>
+#include <library/cpp/actors/util/unordered_cache.h>
+#include <library/cpp/actors/util/threadparkpad.h>
+#include <util/system/condvar.h>
+
+namespace NActors {
+ class TIOExecutorPool: public TExecutorPoolBase {
+ struct TThreadCtx {
+ TAutoPtr<TExecutorThread> Thread;
+ TThreadParkPad Pad;
+ };
+
+ TArrayHolder<TThreadCtx> Threads;
+ TUnorderedCache<ui32, 512, 4> ThreadQueue;
+
+ THolder<NSchedulerQueue::TQueueType> ScheduleQueue;
+ TTicketLock ScheduleLock;
+
+ const TString PoolName;
+
+ public:
+ TIOExecutorPool(ui32 poolId, ui32 threads, const TString& poolName = "", TAffinity* affinity = nullptr,
+ ui32 maxActivityType = 1);
+ explicit TIOExecutorPool(const TIOExecutorPoolConfig& cfg);
+ ~TIOExecutorPool();
+
+ ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) override;
+
+ void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+ void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+ void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+
+ void ScheduleActivationEx(ui32 activation, ui64 revolvingWriteCounter) override;
+
+ void Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) override;
+ void Start() override;
+ void PrepareStop() override;
+ void Shutdown() override;
+
+ void GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const override;
+ TString GetName() const override;
+ };
+}
diff --git a/library/cpp/actors/core/executor_pool_united.cpp b/library/cpp/actors/core/executor_pool_united.cpp
new file mode 100644
index 0000000000..dac6245635
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_united.cpp
@@ -0,0 +1,1428 @@
+#include "executor_pool_united.h"
+
+#include "balancer.h"
+#include "cpu_state.h"
+#include "executor_thread.h"
+#include "probes.h"
+#include "mailbox.h"
+#include "scheduler_queue.h"
+#include <library/cpp/actors/util/affinity.h>
+#include <library/cpp/actors/util/datetime.h>
+#include <library/cpp/actors/util/futex.h>
+#include <library/cpp/actors/util/intrinsics.h>
+#include <library/cpp/actors/util/timerfd.h>
+
+#include <util/system/datetime.h>
+#include <util/system/hp_timer.h>
+
+#include <algorithm>
+
+namespace NActors {
+ LWTRACE_USING(ACTORLIB_PROVIDER);
+
+ struct TUnitedWorkers::TWorker: public TNonCopyable {
+ TAutoPtr<TExecutorThread> Thread;
+ volatile TThreadId ThreadId = UnknownThreadId;
+ NSchedulerQueue::TQueueType SchedulerQueue;
+ };
+
+ struct TUnitedWorkers::TPool: public TNonCopyable {
+ TAtomic Waiters = 0; // Number of idle cpus, waiting for activations in this pool
+ char Padding[64 - sizeof(TAtomic)];
+
+ TUnorderedCache<ui32, 512, 4> Activations; // MPMC-queue for mailbox activations
+ TAtomic Active = 0; // Number of mailboxes ready for execution or currently executing
+ TAtomic Tokens = 0; // Pending tokens (token is required for worker to start execution, guarantees concurrency limit and activation availability)
+ volatile bool StopFlag = false;
+
+ // Configuration
+ TPoolId PoolId;
+ TAtomicBase Concurrency; // Max concurrent workers running this pool
+ IExecutorPool* ExecutorPool;
+ TMailboxTable* MailboxTable;
+ ui64 TimePerMailboxTs;
+ ui32 EventsPerMailbox;
+
+ // Cpus this pool is allowed to run on
+ // Cpus are specified in wake order
+ TStackVec<TCpu*, 15> WakeOrderCpus;
+
+ ~TPool() {
+ while (Activations.Pop(0)) {}
+ }
+
+ void Stop() {
+ AtomicStore(&StopFlag, true);
+ }
+
+ bool IsUnited() const {
+ return WakeOrderCpus.size();
+ }
+
+ // Add activation of newly scheduled mailbox. Returns generated token (unless concurrency is exceeded)
+ bool PushActivation(ui32 activation, ui64 revolvingCounter) {
+ Activations.Push(activation, revolvingCounter);
+ TAtomicBase active = AtomicIncrement(Active);
+ if (active <= Concurrency) { // token generated
+ AtomicIncrement(Tokens);
+ return true;
+ }
+ return false;
+ }
+
+ template <bool Relaxed>
+ static bool TryAcquireTokenImpl(TAtomic* tokens) {
+ while (true) {
+ TAtomicBase value;
+ if constexpr (Relaxed) {
+ value = RelaxedLoad(tokens);
+ } else {
+ value = AtomicLoad(tokens);
+ }
+ if (value > 0) {
+ if (AtomicCas(tokens, value - 1, value)) {
+ return true; // token acquired
+ }
+ } else {
+ return false; // no more tokens
+ }
+ }
+ }
+
+ // Try acquire pending token. Must be done before execution
+ bool TryAcquireToken() {
+ return TryAcquireTokenImpl<false>(&Tokens);
+ }
+
+ // Try acquire pending token. Must be done before execution
+ bool TryAcquireTokenRelaxed() {
+ return TryAcquireTokenImpl<true>(&Tokens);
+ }
+
+ // Get activation. Requires acquired token.
+ void BeginExecution(ui32& activation, ui64 revolvingCounter) {
+ while (!RelaxedLoad(&StopFlag)) {
+ if (activation = Activations.Pop(++revolvingCounter)) {
+ return;
+ }
+ SpinLockPause();
+ }
+ activation = 0; // should stop
+ }
+
+ // End currently active execution and start new one if token is available.
+ // Reuses token if it's not destroyed.
+ // Returned `true` means successful switch, `activation` is filled.
+ // Returned `false` means execution has ended, no need to call StopExecution()
+ bool NextExecution(ui32& activation, ui64 revolvingCounter) {
+ if (AtomicDecrement(Active) >= Concurrency) { // reuse just released token
+ BeginExecution(activation, revolvingCounter);
+ return true;
+ } else if (TryAcquireToken()) { // another token acquired
+ BeginExecution(activation, revolvingCounter);
+ return true;
+ }
+ return false; // no more tokens available
+ }
+
+ // Stop active execution. Returns released token (unless it is destroyed)
+ bool StopExecution() {
+ TAtomicBase active = AtomicDecrement(Active);
+ if (active >= Concurrency) { // token released
+ AtomicIncrement(Tokens);
+ return true;
+ }
+ return false; // token destroyed
+ }
+
+ // Switch worker context into this pool
+ void Switch(TWorkerContext& wctx, ui64 softDeadlineTs, TExecutorThreadStats& stats) {
+ wctx.Switch(ExecutorPool, MailboxTable, TimePerMailboxTs, EventsPerMailbox, softDeadlineTs, &stats);
+ }
+ };
+
+ class TPoolScheduler {
+ class TSchedulable {
+ // Lower PoolBits store PoolId
+ // All other higher bits store virtual runtime in cycles
+ using TValue = ui64;
+ TValue Value;
+
+ static constexpr ui64 PoolIdMask = ui64((1ull << PoolBits) - 1);
+ static constexpr ui64 VRunTsMask = ~PoolIdMask;
+
+ public:
+ explicit TSchedulable(TPoolId poolId = MaxPools, ui64 vrunts = 0)
+ : Value((poolId & PoolIdMask) | (vrunts & VRunTsMask))
+ {}
+
+ TPoolId GetPoolId() const {
+ return Value & PoolIdMask;
+ }
+
+ ui64 GetVRunTs() const {
+ // Do not truncate pool id
+ // NOTE: it decrease accuracy, but improves performance
+ return Value;
+ }
+
+ ui64 GetPreciseVRunTs() const {
+ return Value & VRunTsMask;
+ }
+
+ void SetVRunTs(ui64 vrunts) {
+ Value = (Value & PoolIdMask) | (vrunts & VRunTsMask);
+ }
+
+ void Account(ui64 base, ui64 ts) {
+ // Add at least minimum amount to change Value
+ SetVRunTs(base + Max(ts, PoolIdMask + 1));
+ }
+ };
+
+ // For min-heap of Items
+ struct TCmp {
+ bool operator()(TSchedulable lhs, TSchedulable rhs) const {
+ return lhs.GetVRunTs() > rhs.GetVRunTs();
+ }
+ };
+
+ TPoolId Size = 0; // total number of pools on this cpu
+ TPoolId Current = 0; // index of current pool in `Items`
+
+ // At the beginning `Current` items are orginized as binary min-heap -- ready to be scheduled
+ // The rest `Size - Current` items are unordered (required to keep track of last vrunts)
+ TSchedulable Items[MaxPools]; // virtual runtime in cycles for each pool
+ ui64 MinVRunTs = 0; // virtual runtime used by waking pools (system's vrunts)
+ ui64 Ts = 0; // real timestamp of current execution start (for accounting)
+
+ // Maps PoolId into it's inverse weight
+ ui64 InvWeights[MaxPools];
+ static constexpr ui64 VRunTsOverflow = ui64(1ull << 62ull) / MaxPoolWeight;
+
+ public:
+ void AddPool(TPoolId pool, TPoolWeight weight) {
+ Items[Size] = TSchedulable(pool, MinVRunTs);
+ Size++;
+ InvWeights[pool] = MaxPoolWeight / std::clamp(weight ? weight : DefPoolWeight, MinPoolWeight, MaxPoolWeight);
+ }
+
+ // Iterate over pools in scheduling order
+ // should be used in construction:
+ // for (TPoolId pool = Begin(); pool != End(); pool = Next())
+ TPoolId Begin() {
+ // Wrap vruntime around to avoid overflow, if required
+ if (Y_UNLIKELY(MinVRunTs >= VRunTsOverflow)) {
+ for (TPoolId i = 0; i < Size; i++) {
+ ui64 ts = Items[i].GetPreciseVRunTs();
+ Items[i].SetVRunTs(ts >= VRunTsOverflow ? ts - VRunTsOverflow : 0);
+ }
+ MinVRunTs -= VRunTsOverflow;
+ }
+ Current = Size;
+ std::make_heap(Items, Items + Current, TCmp());
+ return Next();
+ }
+
+ constexpr TPoolId End() const {
+ return MaxPools;
+ }
+
+ TPoolId Next() {
+ if (Current > 0) {
+ std::pop_heap(Items, Items + Current, TCmp());
+ Current--;
+ return CurrentPool();
+ } else {
+ return End();
+ }
+ }
+
+ // Scheduling was successful, we are going to run CurrentPool()
+ void Scheduled() {
+ MinVRunTs = Max(MinVRunTs, Items[Current].GetPreciseVRunTs());
+ // NOTE: Ts is propagated on Account() to avoid gaps
+ }
+
+ // Schedule specific pool that woke up cpu after idle
+ void ScheduledAfterIdle(TPoolId pool, ui64 ts) {
+ if (Y_UNLIKELY(ts < Ts)) { // anomaly: time goes backwards (e.g. rdtsc is reset to zero on cpu reset)
+ Ts = ts; // just skip anomalous time slice
+ return;
+ }
+ MinVRunTs += (ts - Ts) * (MaxPoolWeight / DefPoolWeight); // propagate system's vrunts to blur difference between pools
+ Ts = ts; // propagate time w/o accounting to any pool
+
+ // Set specified pool as current, it requires scan
+ for (Current = 0; Current < Size && pool != Items[Current].GetPoolId(); Current++) {}
+ Y_VERIFY(Current < Size);
+ }
+
+ // Account currently running pool till now (ts)
+ void Account(ui64 ts) {
+ // Skip time slice for the first run and when time goes backwards (e.g. rdtsc is reset to zero on cpu reset)
+ if (Y_LIKELY(Ts > 0 && Ts <= ts)) {
+ TPoolId pool = CurrentPool();
+ Y_VERIFY(pool < MaxPools);
+ Items[Current].Account(MinVRunTs, (ts - Ts) * InvWeights[pool]);
+ }
+ Ts = ts; // propagate time
+ }
+
+ TPoolId CurrentPool() const {
+ return Items[Current].GetPoolId();
+ }
+ };
+
+ // Cyclic array of timers for idle workers to wait for hard preemption on
+ struct TIdleQueue: public TNonCopyable {
+ TArrayHolder<TTimerFd> Timers;
+ size_t Size;
+ TAtomic EnqueueCounter = 0;
+ TAtomic DequeueCounter = 0;
+
+ explicit TIdleQueue(size_t size)
+ : Timers(new TTimerFd[size])
+ , Size(size)
+ {}
+
+ void Stop() {
+ for (size_t i = 0; i < Size; i++) {
+ Timers[i].Wake();
+ }
+ }
+
+ // Returns timer which new idle-worker should wait for
+ TTimerFd* Enqueue() {
+ return &Timers[AtomicGetAndIncrement(EnqueueCounter) % Size];
+ }
+
+ // Returns timer that hard preemption should trigger to wake idle-worker
+ TTimerFd* Dequeue() {
+ return &Timers[AtomicGetAndIncrement(DequeueCounter) % Size];
+ }
+ };
+
+ // Base class for cpu-local managers that help workers on single cpu to cooperate
+ struct TCpuLocalManager: public TThrRefBase {
+ TUnitedWorkers* United;
+
+ explicit TCpuLocalManager(TUnitedWorkers* united)
+ : United(united)
+ {}
+
+ virtual TWorkerId WorkerCount() const = 0;
+ virtual void AddWorker(TWorkerId workerId) = 0;
+ virtual void Stop() = 0;
+ };
+
+ // Represents cpu with single associated worker that is able to execute any pool.
+ // It always executes pool assigned by balancer and switch pool only if assigned pool has changed
+ struct TAssignedCpu: public TCpuLocalManager {
+ bool Started = false;
+
+ TAssignedCpu(TUnitedWorkers* united)
+ : TCpuLocalManager(united)
+ {}
+
+ TWorkerId WorkerCount() const override {
+ return 1;
+ }
+
+ void AddWorker(TWorkerId workerId) override {
+ Y_UNUSED(workerId);
+ }
+
+ ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) {
+ ui32 activation;
+ if (Y_UNLIKELY(!Started)) {
+ Started = true;
+ } else if (Y_UNLIKELY(United->IsPoolReassigned(wctx))) {
+ United->StopExecution(wctx.PoolId); // stop current execution and switch pool if reassigned
+ } else if (United->NextExecution(wctx.PoolId, activation, revolvingCounter)) {
+ return activation; // another activation from currently executing pool (or 0 if stopped)
+ }
+
+ // Switch to another pool, it blocks until token is acquired
+ if (Y_UNLIKELY(!SwitchPool(wctx))) {
+ return 0; // stopped
+ }
+ United->SwitchPool(wctx, 0);
+ United->BeginExecution(wctx.PoolId, activation, revolvingCounter);
+ return activation;
+ }
+
+ void Stop() override {
+ }
+
+ private:
+ // Sets next pool to run, and acquires token, blocks if there are no tokens
+ bool SwitchPool(TWorkerContext& wctx) {
+ if (Y_UNLIKELY(United->IsStopped())) {
+ return false;
+ }
+
+ // Run balancer (if it's time to)
+ United->Balance();
+
+ // Select pool to execute
+ wctx.PoolId = United->AssignedPool(wctx);
+ Y_VERIFY(wctx.PoolId != CpuShared);
+ if (United->TryAcquireToken(wctx.PoolId)) {
+ return true;
+ }
+
+ // No more work -- wait for activations (spinning, then blocked)
+ wctx.PoolId = United->Idle(wctx.PoolId, wctx);
+
+ // Wakeup or stop occured
+ if (Y_UNLIKELY(wctx.PoolId == CpuStopped)) {
+ return false;
+ }
+ return true; // United->Idle() has already acquired token
+ }
+ };
+
+ // Lock-free data structure that help workers on single cpu to discover their state and do hard preemptions
+ struct TSharedCpu: public TCpuLocalManager {
+ // Current lease
+ volatile TLease::TValue CurrentLease;
+ char Padding1[64 - sizeof(TLease)];
+
+ // Slow pools
+ // the highest bit: 1=wait-for-slow-workers mode 0=else
+ // any lower bit (poolId is bit position): 1=pool-is-slow 0=pool-is-fast
+ volatile TPoolsMask SlowPoolsMask = 0;
+ char Padding2[64 - sizeof(TPoolsMask)];
+
+ // Must be accessed under never expiring lease to avoid races
+ TPoolScheduler PoolSched;
+ TWorkerId FastWorker = MaxWorkers;
+ TTimerFd* PreemptionTimer = nullptr;
+ ui64 HardPreemptionTs = 0;
+ bool Started = false;
+
+ TIdleQueue IdleQueue;
+
+ struct TConfig {
+ const TCpuId CpuId;
+ const TWorkerId Workers;
+ ui64 SoftLimitTs;
+ ui64 HardLimitTs;
+ ui64 EventLimitTs;
+ ui64 LimitPrecisionTs;
+ const int IdleWorkerPriority;
+ const int FastWorkerPriority;
+ const bool NoRealtime;
+ const bool NoAffinity;
+ const TCpuAllocation CpuAlloc;
+
+ TConfig(const TCpuAllocation& allocation, const TUnitedWorkersConfig& united)
+ : CpuId(allocation.CpuId)
+ , Workers(allocation.AllowedPools.size() + 1)
+ , SoftLimitTs(Us2Ts(united.PoolLimitUs))
+ , HardLimitTs(Us2Ts(united.PoolLimitUs + united.EventLimitUs))
+ , EventLimitTs(Us2Ts(united.EventLimitUs))
+ , LimitPrecisionTs(Us2Ts(united.LimitPrecisionUs))
+ , IdleWorkerPriority(std::clamp<ui64>(united.IdleWorkerPriority ? united.IdleWorkerPriority : 20, 1, 99))
+ , FastWorkerPriority(std::clamp<ui64>(united.FastWorkerPriority ? united.FastWorkerPriority : 10, 1, IdleWorkerPriority - 1))
+ , NoRealtime(united.NoRealtime)
+ , NoAffinity(united.NoAffinity)
+ , CpuAlloc(allocation)
+ {}
+ };
+
+ TConfig Config;
+ TVector<TWorkerId> Workers;
+
+ TSharedCpu(const TConfig& cfg, TUnitedWorkers* united)
+ : TCpuLocalManager(united)
+ , IdleQueue(cfg.Workers)
+ , Config(cfg)
+ {
+ for (const auto& pa : Config.CpuAlloc.AllowedPools) {
+ PoolSched.AddPool(pa.PoolId, pa.Weight);
+ }
+ }
+
+ TWorkerId WorkerCount() const override {
+ return Config.Workers;
+ }
+
+ void AddWorker(TWorkerId workerId) override {
+ if (Workers.empty()) {
+ // Grant lease to the first worker
+ AtomicStore(&CurrentLease, TLease(workerId, NeverExpire).Value);
+ }
+ Workers.push_back(workerId);
+ }
+
+ ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) {
+ ui32 activation;
+ if (!wctx.Lease.IsNeverExpiring()) {
+ if (wctx.SoftDeadlineTs < GetCycleCountFast()) { // stop if lease has expired or is near to be expired
+ United->StopExecution(wctx.PoolId);
+ } else if (United->NextExecution(wctx.PoolId, activation, revolvingCounter)) {
+ return activation; // another activation from currently executing pool (or 0 if stopped)
+ }
+ }
+
+ // Switch to another pool, it blocks until token is acquired
+ if (Y_UNLIKELY(!SwitchPool(wctx))) {
+ return 0; // stopped
+ }
+ United->BeginExecution(wctx.PoolId, activation, revolvingCounter);
+ return activation;
+ }
+
+ void Stop() override {
+ IdleQueue.Stop();
+ }
+
+ private:
+ enum EPriority {
+ IdlePriority, // highest (real-time, Config.IdleWorkerPriority)
+ FastPriority, // normal (real-time, Config.FastWorkerPriority)
+ SlowPriority, // lowest (not real-time)
+ };
+
+ enum EWorkerAction {
+ // Fast-worker
+ ExecuteFast,
+ WaitForSlow,
+
+ // Slow-worker
+ BecameIdle,
+ WakeFast,
+
+ // Idle-worker
+ BecameFast,
+ Standby,
+
+ // Common
+ Stopped,
+ };
+
+ // Thread-safe; should be called from worker
+ // Blocks for idle-workers; sets lease and next pool to run
+ bool SwitchPool(TWorkerContext& wctx) {
+ TTimerFd* idleTimer = nullptr;
+ while (true) {
+ if (DisablePreemptionAndTryExtend(wctx.Lease)) { // if fast-worker
+ if (Y_UNLIKELY(!Started)) {
+ SetPriority(0, FastPriority);
+ Started = true;
+ }
+ while (true) {
+ switch (FastWorkerAction(wctx)) {
+ case ExecuteFast:
+ United->SwitchPool(wctx, wctx.Lease.GetPreciseExpireTs() - Config.EventLimitTs);
+ EnablePreemptionAndGrant(wctx.Lease);
+ return true;
+ case WaitForSlow:
+ FastWorkerSleep(GetCycleCountFast() + Config.SoftLimitTs);
+ break;
+ case Stopped: return false;
+ default: Y_FAIL();
+ }
+ }
+ } else if (wctx.Lease.IsNeverExpiring()) { // if idle-worker
+ switch (IdleWorkerAction(idleTimer, wctx.Lease.GetWorkerId())) {
+ case BecameFast:
+ SetPriority(0, FastPriority);
+ break; // try acquire new lease
+ case Standby:
+ if (!idleTimer) {
+ idleTimer = IdleQueue.Enqueue();
+ }
+ SetPriority(0, IdlePriority);
+ idleTimer->Wait();
+ break;
+ case Stopped: return false;
+ default: Y_FAIL();
+ }
+ } else { // lease has expired and hard preemption occured, so we are executing in a slow-worker
+ wctx.IncrementPreemptedEvents();
+ switch (SlowWorkerAction(wctx.PoolId)) {
+ case WakeFast:
+ WakeFastWorker();
+ [[fallthrough]]; // no break; pass through
+ case BecameIdle:
+ wctx.Lease = wctx.Lease.NeverExpire();
+ wctx.PoolId = MaxPools;
+ idleTimer = nullptr;
+ break;
+ case Stopped: return false;
+ default: Y_FAIL();
+ }
+ }
+ }
+ }
+
+ enum ETryRunPool {
+ RunFastPool,
+ RunSlowPool,
+ NoTokens,
+ };
+
+ ETryRunPool TryRun(TPoolId pool) {
+ while (true) {
+ // updates WaitPoolsFlag in SlowPoolsMask according to scheduled pool slowness
+ TPoolsMask slow = AtomicLoad(&SlowPoolsMask);
+ if ((1ull << pool) & slow) { // we are about to execute slow pool (fast-worker will just wait, token is NOT required)
+ if (slow & WaitPoolsFlag) {
+ return RunSlowPool; // wait flag is already set
+ } else {
+ if (AtomicCas(&SlowPoolsMask, slow | WaitPoolsFlag, slow)) { // try set wait flag
+ return RunSlowPool; // wait flag has been successfully set
+ }
+ }
+ } else { // we are about to execute fast pool, token required
+ if (slow & WaitPoolsFlag) { // reset wait flag if required
+ if (AtomicCas(&SlowPoolsMask, slow & ~WaitPoolsFlag, slow)) { // try reset wait flag
+ return United->TryAcquireToken(pool) ? RunFastPool : NoTokens; // wait flag has been successfully reset
+ }
+ } else {
+ return United->TryAcquireToken(pool) ? RunFastPool : NoTokens; // wait flag is already reset
+ }
+ }
+ }
+ }
+
+ EWorkerAction FastWorkerAction(TWorkerContext& wctx) {
+ if (Y_UNLIKELY(United->IsStopped())) {
+ return Stopped;
+ }
+
+ // Account current pool
+ ui64 ts = GetCycleCountFast();
+ PoolSched.Account(ts);
+
+ // Select next pool to execute
+ for (wctx.PoolId = PoolSched.Begin(); wctx.PoolId != PoolSched.End(); wctx.PoolId = PoolSched.Next()) {
+ switch (TryRun(wctx.PoolId)) {
+ case RunFastPool:
+ PoolSched.Scheduled();
+ wctx.Lease = PostponePreemption(wctx.Lease.GetWorkerId(), ts);
+ return ExecuteFast;
+ case RunSlowPool:
+ PoolSched.Scheduled();
+ ResetPreemption(wctx.Lease.GetWorkerId(), ts); // there is no point in preemption during wait
+ return WaitForSlow;
+ case NoTokens: // concurrency limit reached, or no more work in pool
+ break; // just try next pool (if any)
+ }
+ }
+
+ // No more work, no slow-workers -- wait for activations (active, then blocked)
+ wctx.PoolId = United->Idle(CpuShared, wctx);
+
+ // Wakeup or stop occured
+ if (Y_UNLIKELY(wctx.PoolId == CpuStopped)) {
+ return Stopped;
+ }
+ ts = GetCycleCountFast();
+ PoolSched.ScheduledAfterIdle(wctx.PoolId, ts);
+ wctx.Lease = PostponePreemption(wctx.Lease.GetWorkerId(), ts);
+ return ExecuteFast; // United->Idle() has already acquired token
+ }
+
+ EWorkerAction IdleWorkerAction(TTimerFd* idleTimer, TWorkerId workerId) {
+ if (Y_UNLIKELY(United->IsStopped())) {
+ return Stopped;
+ }
+ if (!idleTimer) { // either worker start or became idle -- hard preemption is not required
+ return Standby;
+ }
+
+ TLease lease = TLease(AtomicLoad(&CurrentLease));
+ ui64 ts = GetCycleCountFast();
+ if (lease.GetExpireTs() < ts) { // current lease has expired
+ if (TryBeginHardPreemption(lease)) {
+ SetPoolIsSlowFlag(PoolSched.CurrentPool());
+ TWorkerId preempted = lease.GetWorkerId();
+ SetPriority(United->GetWorkerThreadId(preempted), SlowPriority);
+ LWPROBE(HardPreemption, Config.CpuId, PoolSched.CurrentPool(), preempted, workerId);
+ EndHardPreemption(workerId);
+ return BecameFast;
+ } else {
+ // Lease has been changed just now, no way we need preemption right now, so no retry needed
+ return Standby;
+ }
+ } else {
+ // Lease has not expired yet (maybe never expiring lease)
+ return Standby;
+ }
+ }
+
+ EWorkerAction SlowWorkerAction(TPoolId pool) {
+ if (Y_UNLIKELY(United->IsStopped())) {
+ return Stopped;
+ }
+ while (true) {
+ TPoolsMask slow = AtomicLoad(&SlowPoolsMask);
+ if (slow & (1ull << pool)) {
+ if (slow == (1ull << pool) & WaitPoolsFlag) { // the last slow pool is going to became fast
+ if (AtomicCas(&SlowPoolsMask, 0, slow)) { // reset both pool-is-slow flag and WaitPoolsFlag
+ return WakeFast;
+ }
+ } else { // there are (a) several slow-worker or (b) one slow-worker w/o waiting fast-worker
+ if (AtomicCas(&SlowPoolsMask, slow & ~(1ull << pool), slow)) { // reset pool-is-slow flag
+ return BecameIdle;
+ }
+ }
+ } else {
+ // SlowWorkerAction has been called between TryBeginHardPreemption and SetPoolIsSlowFlag
+ // flag for this pool is not set yet, but we can be sure pool is slow:
+ // - because SlowWorkerAction has been called;
+ // - this mean lease has expired and hard preemption occured.
+ // So just wait other worker to call SetPoolIsSlowFlag
+ LWPROBE(SlowWorkerActionRace, Config.CpuId, pool, slow);
+ }
+ }
+ }
+
+ void SetPoolIsSlowFlag(TPoolId pool) {
+ while (true) {
+ TPoolsMask slow = AtomicLoad(&SlowPoolsMask);
+ if ((slow & (1ull << pool)) == 0) { // if pool is fast
+ if (AtomicCas(&SlowPoolsMask, slow | (1ull << pool), slow)) { // set pool-is-slow flag
+ return;
+ }
+ } else {
+ Y_FAIL("two slow-workers executing the same pool on the same core");
+ return; // pool is already slow
+ }
+ }
+ }
+
+ bool TryBeginHardPreemption(TLease lease) {
+ return AtomicCas(&CurrentLease, HardPreemptionLease, lease);
+ }
+
+ void EndHardPreemption(TWorkerId to) {
+ ATOMIC_COMPILER_BARRIER();
+ if (!AtomicCas(&CurrentLease, TLease(to, NeverExpire), HardPreemptionLease)) {
+ Y_FAIL("hard preemption failed");
+ }
+ }
+
+ bool DisablePreemptionAndTryExtend(TLease lease) {
+ return AtomicCas(&CurrentLease, lease.NeverExpire(), lease);
+ }
+
+ void EnablePreemptionAndGrant(TLease lease) {
+ ATOMIC_COMPILER_BARRIER();
+ if (!AtomicCas(&CurrentLease, lease, lease.NeverExpire())) {
+ Y_FAIL("lease grant failed");
+ }
+ }
+
+ void FastWorkerSleep(ui64 deadlineTs) {
+ while (true) {
+ TPoolsMask slow = AtomicLoad(&SlowPoolsMask);
+ if ((slow & WaitPoolsFlag) == 0) {
+ return; // woken by WakeFast action
+ }
+ ui64 ts = GetCycleCountFast();
+ if (deadlineTs <= ts) {
+ if (AtomicCas(&SlowPoolsMask, slow & ~WaitPoolsFlag, slow)) { // try reset wait flag
+ return; // wait flag has been successfully reset after timeout
+ }
+ } else { // should wait
+ ui64 timeoutNs = Ts2Ns(deadlineTs - ts);
+#ifdef _linux_
+ timespec timeout;
+ timeout.tv_sec = timeoutNs / 1'000'000'000;
+ timeout.tv_nsec = timeoutNs % 1'000'000'000;
+ SysFutex(FastWorkerFutex(), FUTEX_WAIT_PRIVATE, FastWorkerFutexValue(slow), &timeout, nullptr, 0);
+#else
+ NanoSleep(timeoutNs); // non-linux wake is not supported, cpu will go idle on slow -> fast switch
+#endif
+ }
+ }
+ }
+
+ void WakeFastWorker() {
+#ifdef _linux_
+ SysFutex(FastWorkerFutex(), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0);
+#endif
+ }
+
+#ifdef _linux_
+ ui32* FastWorkerFutex() {
+ // Actually we wait on one highest bit, but futex value size is 4 bytes on all platforms
+ static_assert(sizeof(TPoolsMask) >= 4, "cannot be used as futex value on linux");
+ return (ui32*)&SlowPoolsMask + 1; // higher 32 bits (little endian assumed)
+ }
+
+ ui32 FastWorkerFutexValue(TPoolsMask slow) {
+ return ui32(slow >> 32); // higher 32 bits
+ }
+#endif
+
+ void SetPriority(TThreadId tid, EPriority priority) {
+ if (Config.NoRealtime) {
+ return;
+ }
+#ifdef _linux_
+ int policy;
+ struct sched_param param;
+ switch (priority) {
+ case IdlePriority:
+ policy = SCHED_FIFO;
+ param.sched_priority = Config.IdleWorkerPriority;
+ break;
+ case FastPriority:
+ policy = SCHED_FIFO;
+ param.sched_priority = Config.FastWorkerPriority;
+ break;
+ case SlowPriority:
+ policy = SCHED_OTHER;
+ param.sched_priority = 0;
+ break;
+ }
+ int ret = sched_setscheduler(tid, policy, &param);
+ switch (ret) {
+ case 0: return;
+ case EINVAL:
+ Y_FAIL("sched_setscheduler(%" PRIu64 ", %d, %d) -> EINVAL", tid, policy, param.sched_priority);
+ case EPERM:
+ // Requirements:
+ // * CAP_SYS_NICE capability to run real-time processes and set cpu affinity.
+ // Either run under root or set application capabilities:
+ // sudo setcap cap_sys_nice=eip BINARY
+ // * Non-zero rt-runtime (in case cgroups are used).
+ // Either (a) disable global limit on RT processes bandwidth:
+ // sudo sysctl -w kernel.sched_rt_runtime_us=-1
+ // Or (b) set non-zero rt-runtime for your cgroup:
+ // echo -1 > /sys/fs/cgroup/cpu/[cgroup]/cpu.rt_runtime_us
+ // (also set the same value for every parent cgroup)
+ // https://www.kernel.org/doc/Documentation/scheduler/sched-rt-group.txt
+ Y_FAIL("sched_setscheduler(%" PRIu64 ", %d, %d) -> EPERM", tid, policy, param.sched_priority);
+ case ESRCH:
+ Y_FAIL("sched_setscheduler(%" PRIu64 ", %d, %d) -> ESRCH", tid, policy, param.sched_priority);
+ default:
+ Y_FAIL("sched_setscheduler(%" PRIu64 ", %d, %d) -> %d", tid, policy, param.sched_priority, ret);
+ }
+#else
+ Y_UNUSED(tid);
+ Y_UNUSED(priority);
+#endif
+ }
+
+ void ResetPreemption(TWorkerId fastWorkerId, ui64 ts) {
+ if (Y_UNLIKELY(!PreemptionTimer)) {
+ return;
+ }
+ if (FastWorker == fastWorkerId && HardPreemptionTs > 0) {
+ PreemptionTimer->Reset();
+ LWPROBE(ResetPreemptionTimer, Config.CpuId, FastWorker, PreemptionTimer->Fd, Ts2Ms(ts), Ts2Ms(HardPreemptionTs));
+ HardPreemptionTs = 0;
+ }
+ }
+
+ TLease PostponePreemption(TWorkerId fastWorkerId, ui64 ts) {
+ // Select new timer after hard preemption
+ if (FastWorker != fastWorkerId) {
+ FastWorker = fastWorkerId;
+ PreemptionTimer = IdleQueue.Dequeue();
+ HardPreemptionTs = 0;
+ }
+
+ ui64 hardPreemptionTs = ts + Config.HardLimitTs;
+ if (hardPreemptionTs > HardPreemptionTs) {
+ // Reset timer (at most once in TickIntervalTs, sacrifice precision)
+ HardPreemptionTs = hardPreemptionTs + Config.LimitPrecisionTs;
+ PreemptionTimer->Set(HardPreemptionTs);
+ LWPROBE(SetPreemptionTimer, Config.CpuId, FastWorker, PreemptionTimer->Fd, Ts2Ms(ts), Ts2Ms(HardPreemptionTs));
+ }
+
+ return TLease(fastWorkerId, hardPreemptionTs);
+ }
+ };
+
+ // Proxy for start and switching TUnitedExecutorPool-s on single cpu via GetReadyActivation()
+ // (does not implement any other method in IExecutorPool)
+ class TCpuExecutorPool: public IExecutorPool {
+ const TString Name;
+
+ public:
+ explicit TCpuExecutorPool(const TString& name)
+ : IExecutorPool(MaxPools)
+ , Name(name)
+ {}
+
+ TString GetName() const override {
+ return Name;
+ }
+
+ void SetRealTimeMode() const override {
+ // derived classes controls rt-priority - do nothing
+ }
+
+ // Should never be called
+ void ReclaimMailbox(TMailboxType::EType, ui32, TWorkerId, ui64) override { Y_FAIL(); }
+ void Schedule(TInstant, TAutoPtr<IEventHandle>, ISchedulerCookie*, TWorkerId) override { Y_FAIL(); }
+ void Schedule(TMonotonic, TAutoPtr<IEventHandle>, ISchedulerCookie*, TWorkerId) override { Y_FAIL(); }
+ void Schedule(TDuration, TAutoPtr<IEventHandle>, ISchedulerCookie*, TWorkerId) override { Y_FAIL(); }
+ bool Send(TAutoPtr<IEventHandle>&) override { Y_FAIL(); }
+ void ScheduleActivation(ui32) override { Y_FAIL(); }
+ void ScheduleActivationEx(ui32, ui64) override { Y_FAIL(); }
+ TActorId Register(IActor*, TMailboxType::EType, ui64, const TActorId&) override { Y_FAIL(); }
+ TActorId Register(IActor*, TMailboxHeader*, ui32, const TActorId&) override { Y_FAIL(); }
+ void Prepare(TActorSystem*, NSchedulerQueue::TReader**, ui32*) override { Y_FAIL(); }
+ void Start() override { Y_FAIL(); }
+ void PrepareStop() override { Y_FAIL(); }
+ void Shutdown() override { Y_FAIL(); }
+ bool Cleanup() override { Y_FAIL(); }
+ };
+
+ // Proxy executor pool working with cpu-local scheduler (aka actorsystem 2.0)
+ class TSharedCpuExecutorPool: public TCpuExecutorPool {
+ TSharedCpu* Local;
+ TIntrusivePtr<TAffinity> SingleCpuAffinity; // no migration support yet
+ public:
+ explicit TSharedCpuExecutorPool(TSharedCpu* local, const TUnitedWorkersConfig& config)
+ : TCpuExecutorPool("u-" + ToString(local->Config.CpuId))
+ , Local(local)
+ , SingleCpuAffinity(config.NoAffinity ? nullptr : new TAffinity(TCpuMask(local->Config.CpuId)))
+ {}
+
+ TAffinity* Affinity() const override {
+ return SingleCpuAffinity.Get();
+ }
+
+ ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) override {
+ return Local->GetReadyActivation(wctx, revolvingCounter);
+ }
+ };
+
+ // Proxy executor pool working with balancer and assigned pools (aka actorsystem 1.5)
+ class TAssignedCpuExecutorPool: public TCpuExecutorPool {
+ TAssignedCpu* Local;
+ TIntrusivePtr<TAffinity> CpuAffinity;
+ public:
+ explicit TAssignedCpuExecutorPool(TAssignedCpu* local, const TUnitedWorkersConfig& config)
+ : TCpuExecutorPool("United")
+ , Local(local)
+ , CpuAffinity(config.NoAffinity ? nullptr : new TAffinity(config.Allowed))
+ {}
+
+ TAffinity* Affinity() const override {
+ return CpuAffinity.Get();
+ }
+
+ ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) override {
+ return Local->GetReadyActivation(wctx, revolvingCounter);
+ }
+ };
+
+ // Representation of a single cpu and it's state visible to other cpus and pools
+ struct TUnitedWorkers::TCpu: public TNonCopyable {
+ struct TScopedWaiters {
+ TCpu& Cpu;
+ TPool* AssignedPool; // nullptr if CpuShared
+
+ // Subscribe on wakeups from allowed pools
+ TScopedWaiters(TCpu& cpu, TPool* assignedPool) : Cpu(cpu), AssignedPool(assignedPool) {
+ if (!AssignedPool) {
+ for (TPool* pool : Cpu.AllowedPools) {
+ AtomicIncrement(pool->Waiters);
+ }
+ } else {
+ AtomicIncrement(AssignedPool->Waiters);
+ }
+ }
+
+ // Unsubscribe from pools we've subscribed on
+ ~TScopedWaiters() {
+ if (!AssignedPool) {
+ for (TPool* pool : Cpu.AllowedPools) {
+ AtomicDecrement(pool->Waiters);
+ }
+ } else {
+ AtomicDecrement(AssignedPool->Waiters);
+ }
+ }
+ };
+
+ // Current cpu state important for other cpus and balancer
+ TCpuState State;
+
+ // Thread-safe per pool stats
+ // NOTE: It's guaranteed that cpu never executes two instance of the same pool
+ TVector<TExecutorThreadStats> PoolStats;
+
+ // Configuration
+ TCpuId CpuId;
+ THolder<TCpuLocalManager> LocalManager;
+ THolder<TCpuExecutorPool> ExecutorPool;
+
+ // Pools allowed to run on this cpu
+ TStackVec<TPool*, 15> AllowedPools;
+
+ void Stop() {
+ if (LocalManager) {
+ State.Stop();
+ LocalManager->Stop();
+ }
+ }
+
+ bool StartSpinning(TUnitedWorkers* united, TPool* assignedPool, TPoolId& result) {
+ // Mark cpu as idle
+ if (Y_UNLIKELY(!State.StartSpinning())) {
+ result = CpuStopped;
+ return true;
+ }
+
+ // Avoid using multiple atomic seq_cst loads in cycle, use barrier once and relaxed ops
+ AtomicBarrier();
+
+ // Check there is no pending tokens (can be released before Waiters increment)
+ if (!assignedPool) {
+ for (TPool* pool : AllowedPools) {
+ if (pool->TryAcquireTokenRelaxed()) {
+ result = WakeWithTokenAcquired(united, pool->PoolId);
+ return true; // token acquired or stop
+ }
+ }
+ } else {
+ if (assignedPool->TryAcquireTokenRelaxed()) {
+ result = WakeWithTokenAcquired(united, assignedPool->PoolId);
+ return true; // token acquired or stop
+ }
+ }
+
+ // At this point we can be sure wakeup won't be lost
+ // So we can actively spin or block w/o checking for pending tokens
+ return false;
+ }
+
+ bool ActiveWait(ui64 spinThresholdTs, TPoolId& result) {
+ ui64 deadline = GetCycleCountFast() + spinThresholdTs;
+ while (GetCycleCountFast() < deadline) {
+ for (ui32 i = 0; i < 12; ++i) {
+ TPoolId current = State.CurrentPool();
+ if (current == CpuSpinning) {
+ SpinLockPause();
+ } else {
+ result = current;
+ return true; // wakeup
+ }
+ }
+ }
+ return false; // spin threshold exceeded, no wakeups
+ }
+
+ bool StartBlocking(TPoolId& result) {
+ // Switch into blocked state
+ if (State.StartBlocking()) {
+ result = State.CurrentPool();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ bool BlockedWait(TPoolId& result, ui64 timeoutNs) {
+ return State.Block(timeoutNs, result);
+ }
+
+ void SwitchPool(TPoolId pool) {
+ return State.SwitchPool(pool);
+ }
+
+ private:
+ TPoolId WakeWithTokenAcquired(TUnitedWorkers* united, TPoolId token) {
+ switch (State.WakeWithTokenAcquired(token)) {
+ case TCpuState::Woken: // we've got token and successfully woken up this cpu
+ // NOTE: sending thread may also wakeup another worker, which wont be able to acquire token and will go idle (it's ok)
+ return token;
+ case TCpuState::NotIdle: { // wakeup event has also occured
+ TPoolId wakeup = State.CurrentPool();
+ if (wakeup != token) { // token and wakeup for different pools
+ united->TryWake(wakeup); // rewake another cpu to avoid losing wakeup
+ }
+ return token;
+ }
+ case TCpuState::Forbidden:
+ Y_FAIL();
+ case TCpuState::Stopped:
+ return CpuStopped;
+ }
+ }
+ };
+
+ TUnitedWorkers::TUnitedWorkers(
+ const TUnitedWorkersConfig& config,
+ const TVector<TUnitedExecutorPoolConfig>& unitedPools,
+ const TCpuAllocationConfig& allocation,
+ IBalancer* balancer)
+ : Balancer(balancer)
+ , Config(config)
+ , Allocation(allocation)
+ {
+ // Find max pool id and initialize pools
+ PoolCount = 0;
+ for (const TCpuAllocation& cpuAlloc : allocation.Items) {
+ for (const auto& pa : cpuAlloc.AllowedPools) {
+ PoolCount = Max<size_t>(PoolCount, pa.PoolId + 1);
+ }
+ }
+ Pools.Reset(new TPool[PoolCount]);
+
+ // Find max cpu id and initialize cpus
+ CpuCount = 0;
+ for (const TCpuAllocation& cpuAlloc : allocation.Items) {
+ CpuCount = Max<size_t>(CpuCount, cpuAlloc.CpuId + 1);
+ }
+ Cpus.Reset(new TCpu[CpuCount]);
+
+ // Setup allocated cpus
+ // NOTE: leave gaps for not allocated cpus (default-initialized)
+ WorkerCount = 0;
+ for (const TCpuAllocation& cpuAlloc : allocation.Items) {
+ TCpu& cpu = Cpus[cpuAlloc.CpuId];
+ cpu.CpuId = cpuAlloc.CpuId;
+ cpu.PoolStats.resize(PoolCount); // NOTE: also may have gaps
+ for (const auto& pa : cpuAlloc.AllowedPools) {
+ cpu.AllowedPools.emplace_back(&Pools[pa.PoolId]);
+ }
+
+ // Setup balancing and cpu-local manager
+ if (!Balancer->AddCpu(cpuAlloc, &cpu.State)) {
+ cpu.State.SwitchPool(0); // set initial state to non-idle to avoid losing wakeups on start
+ cpu.State.AssignPool(CpuShared);
+ TSharedCpu* local = new TSharedCpu(TSharedCpu::TConfig(cpuAlloc, Config), this);
+ cpu.LocalManager.Reset(local);
+ cpu.ExecutorPool.Reset(new TSharedCpuExecutorPool(local, Config));
+ } else {
+ TAssignedCpu* local = new TAssignedCpu(this);
+ cpu.LocalManager.Reset(local);
+ cpu.ExecutorPool.Reset(new TAssignedCpuExecutorPool(local, Config));
+ }
+ WorkerCount += cpu.LocalManager->WorkerCount();
+ }
+
+ // Initialize workers
+ Workers.Reset(new TWorker[WorkerCount]);
+
+ // Setup pools
+ // NOTE: leave gaps for not united pools (default-initialized)
+ for (const TUnitedExecutorPoolConfig& cfg : unitedPools) {
+ TPool& pool = Pools[cfg.PoolId];
+ Y_VERIFY(cfg.PoolId < MaxPools);
+ pool.PoolId = cfg.PoolId;
+ pool.Concurrency = cfg.Concurrency ? cfg.Concurrency : Config.CpuCount;
+ pool.ExecutorPool = nullptr; // should be set later using SetupPool()
+ pool.MailboxTable = nullptr; // should be set later using SetupPool()
+ pool.TimePerMailboxTs = DurationToCycles(cfg.TimePerMailbox);
+ pool.EventsPerMailbox = cfg.EventsPerMailbox;
+
+ // Reinitialize per cpu pool stats with right MaxActivityType
+ for (const TCpuAllocation& cpuAlloc : allocation.Items) {
+ TCpu& cpu = Cpus[cpuAlloc.CpuId];
+ cpu.PoolStats[cfg.PoolId] = TExecutorThreadStats(cfg.MaxActivityType);
+ }
+
+ // Setup WakeOrderCpus: left to right exclusive cpus, then left to right shared cpus.
+ // Waking exclusive cpus first reduce load on shared cpu and improve latency isolation, which is
+ // the point of using exclusive cpu. But note that number of actively spinning idle cpus may increase,
+ // so cpu consumption on light load is higher.
+ for (const TCpuAllocation& cpuAlloc : allocation.Items) {
+ TCpu& cpu = Cpus[cpuAlloc.CpuId];
+ if (cpu.AllowedPools.size() == 1 && cpu.AllowedPools[0] == &pool) {
+ pool.WakeOrderCpus.emplace_back(&cpu);
+ }
+ }
+ for (const TCpuAllocation& cpuAlloc : allocation.Items) {
+ TCpu& cpu = Cpus[cpuAlloc.CpuId];
+ if (cpu.AllowedPools.size() > 1 && cpuAlloc.HasPool(pool.PoolId)) {
+ pool.WakeOrderCpus.emplace_back(&cpu);
+ }
+ }
+ }
+ }
+
+ TUnitedWorkers::~TUnitedWorkers() {
+ }
+
+ void TUnitedWorkers::Prepare(TActorSystem* actorSystem, TVector<NSchedulerQueue::TReader*>& scheduleReaders) {
+ // Setup allocated cpus
+ // NOTE: leave gaps for not allocated cpus (default-initialized)
+ TWorkerId workers = 0;
+ for (TCpuId cpuId = 0; cpuId < CpuCount; cpuId++) {
+ TCpu& cpu = Cpus[cpuId];
+
+ // Setup cpu-local workers
+ if (cpu.LocalManager) {
+ for (size_t i = 0; i < cpu.LocalManager->WorkerCount(); i++) {
+ TWorkerId workerId = workers++;
+ cpu.LocalManager->AddWorker(workerId);
+
+ // Setup worker
+ Y_VERIFY(workerId < WorkerCount);
+ Workers[workerId].Thread.Reset(new TExecutorThread(
+ workerId,
+ cpu.CpuId,
+ actorSystem,
+ cpu.ExecutorPool.Get(), // use cpu-local manager as proxy executor for all workers on cpu
+ nullptr, // MailboxTable is pool-specific, will be set on pool switch
+ cpu.ExecutorPool->GetName()));
+ // NOTE: TWorker::ThreadId will be initialized after in Start()
+
+ scheduleReaders.push_back(&Workers[workerId].SchedulerQueue.Reader);
+ }
+ }
+ }
+ }
+
+ void TUnitedWorkers::Start() {
+ for (TWorkerId workerId = 0; workerId < WorkerCount; workerId++) {
+ Workers[workerId].Thread->Start();
+ }
+ for (TWorkerId workerId = 0; workerId < WorkerCount; workerId++) {
+ AtomicStore(&Workers[workerId].ThreadId, Workers[workerId].Thread->GetThreadId());
+ }
+ }
+
+ inline TThreadId TUnitedWorkers::GetWorkerThreadId(TWorkerId workerId) const {
+ volatile TThreadId* threadId = &Workers[workerId].ThreadId;
+#ifdef _linux_
+ while (AtomicLoad(threadId) == UnknownThreadId) {
+ NanoSleep(1000);
+ }
+#endif
+ return AtomicLoad(threadId);
+ }
+
+ inline NSchedulerQueue::TWriter* TUnitedWorkers::GetScheduleWriter(TWorkerId workerId) const {
+ return &Workers[workerId].SchedulerQueue.Writer;
+ }
+
+ void TUnitedWorkers::SetupPool(TPoolId pool, IExecutorPool* executorPool, TMailboxTable* mailboxTable) {
+ Pools[pool].ExecutorPool = executorPool;
+ Pools[pool].MailboxTable = mailboxTable;
+ }
+
+ void TUnitedWorkers::PrepareStop() {
+ AtomicStore(&StopFlag, true);
+ for (TPoolId pool = 0; pool < PoolCount; pool++) {
+ Pools[pool].Stop();
+ }
+ for (TCpuId cpuId = 0; cpuId < CpuCount; cpuId++) {
+ Cpus[cpuId].Stop();
+ }
+ }
+
+ void TUnitedWorkers::Shutdown() {
+ for (TWorkerId workerId = 0; workerId < WorkerCount; workerId++) {
+ Workers[workerId].Thread->Join();
+ }
+ }
+
+ inline void TUnitedWorkers::PushActivation(TPoolId pool, ui32 activation, ui64 revolvingCounter) {
+ if (Pools[pool].PushActivation(activation, revolvingCounter)) { // token generated
+ TryWake(pool);
+ }
+ }
+
+ inline bool TUnitedWorkers::TryAcquireToken(TPoolId pool) {
+ return Pools[pool].TryAcquireToken();
+ }
+
+ inline void TUnitedWorkers::TryWake(TPoolId pool) {
+ // Avoid using multiple atomic seq_cst loads in cycle, use barrier once
+ AtomicBarrier();
+
+ // Scan every allowed cpu in pool's wakeup order and try to wake the first idle cpu
+ if (RelaxedLoad(&Pools[pool].Waiters) > 0) {
+ for (TCpu* cpu : Pools[pool].WakeOrderCpus) {
+ if (cpu->State.WakeWithoutToken(pool) == TCpuState::Woken) {
+ return; // successful wake up
+ }
+ }
+ }
+
+ // Cpu has not been woken up
+ }
+
+ inline void TUnitedWorkers::BeginExecution(TPoolId pool, ui32& activation, ui64 revolvingCounter) {
+ Pools[pool].BeginExecution(activation, revolvingCounter);
+ }
+
+ inline bool TUnitedWorkers::NextExecution(TPoolId pool, ui32& activation, ui64 revolvingCounter) {
+ return Pools[pool].NextExecution(activation, revolvingCounter);
+ }
+
+ inline void TUnitedWorkers::StopExecution(TPoolId pool) {
+ if (Pools[pool].StopExecution()) { // pending token
+ TryWake(pool);
+ }
+ }
+
+ inline void TUnitedWorkers::Balance() {
+ ui64 ts = GetCycleCountFast();
+ if (Balancer->TryLock(ts)) {
+ for (TPoolId pool = 0; pool < PoolCount; pool++) {
+ if (Pools[pool].IsUnited()) {
+ ui64 ElapsedTs = 0;
+ ui64 ParkedTs = 0;
+ for (TCpu* cpu : Pools[pool].WakeOrderCpus) {
+ const TExecutorThreadStats& cpuStats = cpu->PoolStats[pool];
+ ElapsedTs += cpuStats.ElapsedTicks;
+ ParkedTs += cpuStats.ParkedTicks;
+ }
+ TBalancerStats stats;
+ stats.Ts = ts;
+ stats.CpuUs = Ts2Us(ElapsedTs);
+ stats.IdleUs = Ts2Us(ParkedTs);
+ Balancer->SetPoolStats(pool, stats);
+ }
+ }
+ Balancer->Balance();
+ Balancer->Unlock();
+ }
+ }
+
+ inline TPoolId TUnitedWorkers::AssignedPool(TWorkerContext& wctx) {
+ return Cpus[wctx.CpuId].State.AssignedPool();
+ }
+
+ inline bool TUnitedWorkers::IsPoolReassigned(TWorkerContext& wctx) {
+ return Cpus[wctx.CpuId].State.IsPoolReassigned(wctx.PoolId);
+ }
+
+ inline void TUnitedWorkers::SwitchPool(TWorkerContext& wctx, ui64 softDeadlineTs) {
+ Pools[wctx.PoolId].Switch(wctx, softDeadlineTs, Cpus[wctx.CpuId].PoolStats[wctx.PoolId]);
+ Cpus[wctx.CpuId].SwitchPool(wctx.PoolId);
+ }
+
+ TPoolId TUnitedWorkers::Idle(TPoolId assigned, TWorkerContext& wctx) {
+ wctx.SwitchToIdle();
+
+ TPoolId result;
+ TTimeTracker timeTracker;
+ TCpu& cpu = Cpus[wctx.CpuId];
+ TPool* assignedPool = assigned == CpuShared ? nullptr : &Pools[assigned];
+ TCpu::TScopedWaiters scopedWaiters(cpu, assignedPool);
+ while (true) {
+ if (cpu.StartSpinning(this, assignedPool, result)) {
+ break; // token already acquired (or stop)
+ }
+ result = WaitSequence(cpu, wctx, timeTracker);
+ if (Y_UNLIKELY(result == CpuStopped) || TryAcquireToken(result)) {
+ break; // token acquired (or stop)
+ }
+ }
+
+ wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timeTracker.Elapsed());
+ return result;
+ }
+
+ TPoolId TUnitedWorkers::WaitSequence(TCpu& cpu, TWorkerContext& wctx, TTimeTracker& timeTracker) {
+ TPoolId result;
+ if (cpu.ActiveWait(Us2Ts(Config.SpinThresholdUs), result)) {
+ wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timeTracker.Elapsed());
+ return result;
+ }
+ if (cpu.StartBlocking(result)) {
+ wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timeTracker.Elapsed());
+ return result;
+ }
+ wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timeTracker.Elapsed());
+ bool wakeup;
+ do {
+ wakeup = cpu.BlockedWait(result, Config.Balancer.PeriodUs * 1000);
+ wctx.AddParkedCycles(timeTracker.Elapsed());
+ } while (!wakeup);
+ return result;
+ }
+
+ void TUnitedWorkers::GetCurrentStats(TPoolId pool, TVector<TExecutorThreadStats>& statsCopy) const {
+ size_t idx = 1;
+ statsCopy.resize(idx + Pools[pool].WakeOrderCpus.size());
+ for (TCpu* cpu : Pools[pool].WakeOrderCpus) {
+ TExecutorThreadStats& s = statsCopy[idx++];
+ s = TExecutorThreadStats();
+ s.Aggregate(cpu->PoolStats[pool]);
+ }
+ }
+
+ TUnitedExecutorPool::TUnitedExecutorPool(const TUnitedExecutorPoolConfig& cfg, TUnitedWorkers* united)
+ : TExecutorPoolBaseMailboxed(cfg.PoolId, cfg.MaxActivityType)
+ , United(united)
+ , PoolName(cfg.PoolName)
+ {
+ United->SetupPool(TPoolId(cfg.PoolId), this, MailboxTable.Get());
+ }
+
+ void TUnitedExecutorPool::Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) {
+ ActorSystem = actorSystem;
+
+ // Schedule readers are initialized through TUnitedWorkers::Prepare
+ *scheduleReaders = nullptr;
+ *scheduleSz = 0;
+ }
+
+ void TUnitedExecutorPool::Start() {
+ // workers are actually started in TUnitedWorkers::Start()
+ }
+
+ void TUnitedExecutorPool::PrepareStop() {
+ }
+
+ void TUnitedExecutorPool::Shutdown() {
+ // workers are actually joined in TUnitedWorkers::Shutdown()
+ }
+
+ TAffinity* TUnitedExecutorPool::Affinity() const {
+ Y_FAIL(); // should never be called, TCpuExecutorPool is used instead
+ }
+
+ ui32 TUnitedExecutorPool::GetThreads() const {
+ return 0;
+ }
+
+ ui32 TUnitedExecutorPool::GetReadyActivation(TWorkerContext&, ui64) {
+ Y_FAIL(); // should never be called, TCpu*ExecutorPool is used instead
+ }
+
+ inline void TUnitedExecutorPool::ScheduleActivation(ui32 activation) {
+ TUnitedExecutorPool::ScheduleActivationEx(activation, AtomicIncrement(ActivationsRevolvingCounter));
+ }
+
+ inline void TUnitedExecutorPool::ScheduleActivationEx(ui32 activation, ui64 revolvingCounter) {
+ United->PushActivation(PoolId, activation, revolvingCounter);
+ }
+
+ void TUnitedExecutorPool::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+ TUnitedExecutorPool::Schedule(deadline - ActorSystem->Timestamp(), ev, cookie, workerId);
+ }
+
+ void TUnitedExecutorPool::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+ Y_VERIFY_DEBUG(workerId < United->GetWorkerCount());
+ const auto current = ActorSystem->Monotonic();
+ if (deadline < current) {
+ deadline = current;
+ }
+ United->GetScheduleWriter(workerId)->Push(deadline.MicroSeconds(), ev.Release(), cookie);
+ }
+
+ void TUnitedExecutorPool::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+ Y_VERIFY_DEBUG(workerId < United->GetWorkerCount());
+ const auto deadline = ActorSystem->Monotonic() + delta;
+ United->GetScheduleWriter(workerId)->Push(deadline.MicroSeconds(), ev.Release(), cookie);
+ }
+
+ void TUnitedExecutorPool::GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const {
+ Y_UNUSED(poolStats);
+ if (statsCopy.empty()) {
+ statsCopy.resize(1);
+ }
+ statsCopy[0] = TExecutorThreadStats();
+ statsCopy[0].Aggregate(Stats);
+ United->GetCurrentStats(PoolId, statsCopy);
+ }
+}
diff --git a/library/cpp/actors/core/executor_pool_united.h b/library/cpp/actors/core/executor_pool_united.h
new file mode 100644
index 0000000000..a090ba2466
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_united.h
@@ -0,0 +1,135 @@
+#pragma once
+
+#include "actorsystem.h"
+#include "balancer.h"
+#include "scheduler_queue.h"
+#include "executor_pool_base.h"
+
+#include <library/cpp/actors/util/unordered_cache.h>
+
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+#include <library/cpp/actors/util/unordered_cache.h>
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+
+#include <util/generic/noncopyable.h>
+
+namespace NActors {
+ class TMailboxTable;
+
+ class TUnitedWorkers: public TNonCopyable {
+ struct TWorker;
+ struct TPool;
+ struct TCpu;
+
+ size_t WorkerCount;
+ TArrayHolder<TWorker> Workers; // indexed by WorkerId
+ size_t PoolCount;
+ TArrayHolder<TPool> Pools; // indexed by PoolId, so may include not used (not united) pools
+ size_t CpuCount;
+ TArrayHolder<TCpu> Cpus; // indexed by CpuId, so may include not allocated CPUs
+
+ IBalancer* Balancer; // external pool cpu balancer
+
+ TUnitedWorkersConfig Config;
+ TCpuAllocationConfig Allocation;
+
+ volatile bool StopFlag = false;
+
+ public:
+ TUnitedWorkers(
+ const TUnitedWorkersConfig& config,
+ const TVector<TUnitedExecutorPoolConfig>& unitedPools,
+ const TCpuAllocationConfig& allocation,
+ IBalancer* balancer);
+ ~TUnitedWorkers();
+ void Prepare(TActorSystem* actorSystem, TVector<NSchedulerQueue::TReader*>& scheduleReaders);
+ void Start();
+ void PrepareStop();
+ void Shutdown();
+
+ bool IsStopped() const {
+ return RelaxedLoad(&StopFlag);
+ }
+
+ TWorkerId GetWorkerCount() const {
+ return WorkerCount;
+ }
+
+ // Returns thread id of a worker
+ TThreadId GetWorkerThreadId(TWorkerId workerId) const;
+
+ // Returns per worker schedule writers
+ NSchedulerQueue::TWriter* GetScheduleWriter(TWorkerId workerId) const;
+
+ // Sets executor for specified pool
+ void SetupPool(TPoolId pool, IExecutorPool* executorPool, TMailboxTable* mailboxTable);
+
+ // Add activation of newly scheduled mailbox and wake cpu to execute it if required
+ void PushActivation(TPoolId pool, ui32 activation, ui64 revolvingCounter);
+
+ // Try acquire pending token. Must be done before execution
+ bool TryAcquireToken(TPoolId pool);
+
+ // Try to wake idle cpu waiting for tokens on specified pool
+ void TryWake(TPoolId pool);
+
+ // Get activation from pool; requires pool's token
+ void BeginExecution(TPoolId pool, ui32& activation, ui64 revolvingCounter);
+
+ // Stop currently active execution and start new one if token is available
+ // NOTE: Reuses token if it's not destroyed
+ bool NextExecution(TPoolId pool, ui32& activation, ui64 revolvingCounter);
+
+ // Stop active execution
+ void StopExecution(TPoolId pool);
+
+ // Runs balancer to assign pools to cpus
+ void Balance();
+
+ // Returns pool to be executed by worker or `CpuShared`
+ TPoolId AssignedPool(TWorkerContext& wctx);
+
+ // Checks if balancer has assigned another pool for worker's cpu
+ bool IsPoolReassigned(TWorkerContext& wctx);
+
+ // Switch worker context into specified pool
+ void SwitchPool(TWorkerContext& wctx, ui64 softDeadlineTs);
+
+ // Wait for tokens from any pool allowed on specified cpu
+ TPoolId Idle(TPoolId assigned, TWorkerContext& wctx);
+
+ // Fill stats for specified pool
+ void GetCurrentStats(TPoolId pool, TVector<TExecutorThreadStats>& statsCopy) const;
+
+ private:
+ TPoolId WaitSequence(TCpu& cpu, TWorkerContext& wctx, TTimeTracker& timeTracker);
+ };
+
+ class TUnitedExecutorPool: public TExecutorPoolBaseMailboxed {
+ TUnitedWorkers* United;
+ const TString PoolName;
+ TAtomic ActivationsRevolvingCounter = 0;
+ public:
+ TUnitedExecutorPool(const TUnitedExecutorPoolConfig& cfg, TUnitedWorkers* united);
+
+ void Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) override;
+ void Start() override;
+ void PrepareStop() override;
+ void Shutdown() override;
+
+ TAffinity* Affinity() const override;
+ ui32 GetThreads() const override;
+ ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingReadCounter) override;
+ void ScheduleActivation(ui32 activation) override;
+ void ScheduleActivationEx(ui32 activation, ui64 revolvingWriteCounter) override;
+ void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+ void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+ void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+
+ void GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const override;
+
+ TString GetName() const override {
+ return PoolName;
+ }
+ };
+}
diff --git a/library/cpp/actors/core/executor_pool_united_ut.cpp b/library/cpp/actors/core/executor_pool_united_ut.cpp
new file mode 100644
index 0000000000..d4df17f1b8
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_united_ut.cpp
@@ -0,0 +1,338 @@
+#include "actorsystem.h"
+#include "executor_pool_basic.h"
+#include "hfunc.h"
+#include "scheduler_basic.h"
+
+#include <library/cpp/actors/util/should_continue.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/actors/protos/unittests.pb.h>
+
+using namespace NActors;
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TEvMsg : public NActors::TEventBase<TEvMsg, 10347> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvMsg, "ExecutorPoolTest: Msg");
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+inline ui64 DoTimedWork(ui64 workUs) {
+ ui64 startUs = ThreadCPUTime();
+ ui64 endUs = startUs + workUs;
+ ui64 nowUs = startUs;
+ do {
+ ui64 endTs = GetCycleCountFast() + Us2Ts(endUs - nowUs);
+ while (GetCycleCountFast() <= endTs) {}
+ nowUs = ThreadCPUTime();
+ } while (nowUs <= endUs);
+ return nowUs - startUs;
+}
+
+class TTestSenderActor : public IActor {
+private:
+ using EActivityType = IActor::EActivityType ;
+ using EActorActivity = IActor::EActorActivity;
+
+private:
+ TAtomic Counter;
+ TActorId Receiver;
+
+ std::function<void(void)> Action;
+
+public:
+ TTestSenderActor(std::function<void(void)> action = [](){},
+ EActivityType activityType = EActorActivity::OTHER)
+ : IActor(static_cast<TReceiveFunc>(&TTestSenderActor::Execute), activityType)
+ , Action(action)
+ {}
+
+ void Start(TActorId receiver, size_t count) {
+ AtomicSet(Counter, count);
+ Receiver = receiver;
+ }
+
+ void Stop() {
+ while (true) {
+ if (GetCounter() == 0) {
+ break;
+ }
+
+ Sleep(TDuration::MilliSeconds(1));
+ }
+ }
+
+ size_t GetCounter() const {
+ return AtomicGet(Counter);
+ }
+
+private:
+ STFUNC(Execute) {
+ Y_UNUSED(ctx);
+ switch (ev->GetTypeRewrite()) {
+ hFunc(TEvMsg, Handle);
+ }
+ }
+
+ void Handle(TEvMsg::TPtr &ev) {
+ Y_UNUSED(ev);
+ Action();
+ TAtomicBase count = AtomicDecrement(Counter);
+ Y_VERIFY(count != Max<TAtomicBase>());
+ if (count) {
+ Send(Receiver, new TEvMsg());
+ }
+ }
+};
+
+// Single cpu balancer that switches pool on every activation; not thread-safe
+struct TRoundRobinBalancer: public IBalancer {
+ TCpuState* State;
+ TMap<TPoolId, TPoolId> NextPool;
+
+ bool AddCpu(const TCpuAllocation& cpuAlloc, TCpuState* cpu) override {
+ State = cpu;
+ TPoolId prev = cpuAlloc.AllowedPools.rbegin()->PoolId;
+ for (auto& p : cpuAlloc.AllowedPools) {
+ NextPool[prev] = p.PoolId;
+ prev = p.PoolId;
+ }
+ return true;
+ }
+
+ bool TryLock(ui64) override { return true; }
+ void SetPoolStats(TPoolId, const TBalancerStats&) override {}
+ void Unlock() override {}
+
+ void Balance() override {
+ TPoolId assigned;
+ TPoolId current;
+ State->Load(assigned, current);
+ State->AssignPool(NextPool[assigned]);
+ }
+};
+
+void AddUnitedPool(THolder<TActorSystemSetup>& setup, ui32 concurrency = 0) {
+ TUnitedExecutorPoolConfig united;
+ united.PoolId = setup->GetExecutorsCount();
+ united.Concurrency = concurrency;
+ setup->CpuManager.United.emplace_back(std::move(united));
+}
+
+THolder<TActorSystemSetup> GetActorSystemSetup(ui32 cpuCount) {
+ auto setup = MakeHolder<NActors::TActorSystemSetup>();
+ setup->NodeId = 1;
+ setup->CpuManager.UnitedWorkers.CpuCount = cpuCount;
+ setup->CpuManager.UnitedWorkers.NoRealtime = true; // unavailable in test environment
+ setup->Scheduler = new TBasicSchedulerThread(NActors::TSchedulerConfig(512, 0));
+ return setup;
+}
+
+Y_UNIT_TEST_SUITE(UnitedExecutorPool) {
+
+#ifdef _linux_
+
+ Y_UNIT_TEST(OnePoolManyCpus) {
+ const size_t msgCount = 1e4;
+ auto setup = GetActorSystemSetup(4);
+ AddUnitedPool(setup);
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+
+ auto begin = TInstant::Now();
+
+ auto actor = new TTestSenderActor();
+ auto actorId = actorSystem.Register(actor);
+ actor->Start(actor->SelfId(), msgCount);
+ actorSystem.Send(actorId, new TEvMsg());
+
+ while (actor->GetCounter()) {
+ auto now = TInstant::Now();
+ UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Counter is " << actor->GetCounter());
+
+ Sleep(TDuration::MilliSeconds(1));
+ }
+
+ TVector<TExecutorThreadStats> stats;
+ TExecutorPoolStats poolStats;
+ actorSystem.GetPoolStats(0, poolStats, stats);
+ // Sum all per-thread counters into the 0th element
+ for (ui32 idx = 1; idx < stats.size(); ++idx) {
+ stats[0].Aggregate(stats[idx]);
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].SentEvents, msgCount - 1);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEvents, msgCount);
+ //UNIT_ASSERT_VALUES_EQUAL(stats[0].PreemptedEvents, 0); // depends on execution time and system load, so may be non-zero
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].NonDeliveredEvents, 0);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].EmptyMailboxActivation, 0);
+ //UNIT_ASSERT_VALUES_EQUAL(stats[0].CpuNs, 0); // depends on total duration of test, so undefined
+ UNIT_ASSERT(stats[0].ElapsedTicks > 0);
+ UNIT_ASSERT(stats[0].ParkedTicks == 0); // per-pool parked time does not make sense for united pools
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].BlockedTicks, 0);
+ UNIT_ASSERT(stats[0].ActivationTimeHistogram.TotalSamples >= msgCount / TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].EventDeliveryTimeHistogram.TotalSamples, msgCount);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].EventProcessingCountHistogram.TotalSamples, msgCount);
+ UNIT_ASSERT(stats[0].EventProcessingTimeHistogram.TotalSamples > 0);
+ UNIT_ASSERT(stats[0].ElapsedTicksByActivity[0] > 0);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEventsByActivity[0], msgCount);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].ActorsAliveByActivity[0], 1);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].ScheduledEventsByActivity[0], 0);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolActorRegistrations, 1);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolDestroyedActors, 0);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolAllocatedMailboxes, 4095); // one line
+ UNIT_ASSERT(stats[0].MailboxPushedOutByTime + stats[0].MailboxPushedOutByEventCount + stats[0].MailboxPushedOutBySoftPreemption >= msgCount / TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX);
+ }
+
+ Y_UNIT_TEST(ManyPoolsOneSharedCpu) {
+ const size_t msgCount = 1e4;
+ const size_t pools = 4;
+ auto setup = GetActorSystemSetup(1);
+ for (size_t pool = 0; pool < pools; pool++) {
+ AddUnitedPool(setup);
+ }
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+
+ auto begin = TInstant::Now();
+
+ TVector<TTestSenderActor*> actors;
+ for (size_t pool = 0; pool < pools; pool++) {
+ auto actor = new TTestSenderActor();
+ auto actorId = actorSystem.Register(actor, TMailboxType::HTSwap, pool);
+ actor->Start(actor->SelfId(), msgCount);
+ actorSystem.Send(actorId, new TEvMsg());
+ actors.push_back(actor);
+ }
+
+ while (true) {
+ size_t left = 0;
+ for (auto actor : actors) {
+ left += actor->GetCounter();
+ }
+ if (left == 0) {
+ break;
+ }
+ auto now = TInstant::Now();
+ UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "left " << left);
+ Sleep(TDuration::MilliSeconds(1));
+ }
+
+ for (size_t pool = 0; pool < pools; pool++) {
+ TVector<TExecutorThreadStats> stats;
+ TExecutorPoolStats poolStats;
+ actorSystem.GetPoolStats(pool, poolStats, stats);
+ // Sum all per-thread counters into the 0th element
+ for (ui32 idx = 1; idx < stats.size(); ++idx) {
+ stats[0].Aggregate(stats[idx]);
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEvents, msgCount);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolActorRegistrations, 1);
+ }
+ }
+
+ Y_UNIT_TEST(ManyPoolsOneAssignedCpu) {
+ const size_t msgCount = 1e4;
+ const size_t pools = 4;
+ auto setup = GetActorSystemSetup(1);
+ setup->Balancer.Reset(new TRoundRobinBalancer());
+ for (size_t pool = 0; pool < pools; pool++) {
+ AddUnitedPool(setup);
+ }
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+
+ auto begin = TInstant::Now();
+
+ TVector<TTestSenderActor*> actors;
+ for (size_t pool = 0; pool < pools; pool++) {
+ auto actor = new TTestSenderActor();
+ auto actorId = actorSystem.Register(actor, TMailboxType::HTSwap, pool);
+ actor->Start(actor->SelfId(), msgCount);
+ actorSystem.Send(actorId, new TEvMsg());
+ actors.push_back(actor);
+ }
+
+ while (true) {
+ size_t left = 0;
+ for (auto actor : actors) {
+ left += actor->GetCounter();
+ }
+ if (left == 0) {
+ break;
+ }
+ auto now = TInstant::Now();
+ UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "left " << left);
+ Sleep(TDuration::MilliSeconds(1));
+ }
+
+ for (size_t pool = 0; pool < pools; pool++) {
+ TVector<TExecutorThreadStats> stats;
+ TExecutorPoolStats poolStats;
+ actorSystem.GetPoolStats(pool, poolStats, stats);
+ // Sum all per-thread counters into the 0th element
+ for (ui32 idx = 1; idx < stats.size(); ++idx) {
+ stats[0].Aggregate(stats[idx]);
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEvents, msgCount);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolActorRegistrations, 1);
+ }
+ }
+
+ Y_UNIT_TEST(ManyPoolsOneCpuSlowEvents) {
+ const size_t msgCount = 3;
+ const size_t pools = 4;
+ auto setup = GetActorSystemSetup(1);
+ for (size_t pool = 0; pool < pools; pool++) {
+ AddUnitedPool(setup);
+ }
+ TActorSystem actorSystem(setup);
+ actorSystem.Start();
+
+ auto begin = TInstant::Now();
+
+ TVector<TTestSenderActor*> actors;
+ for (size_t pool = 0; pool < pools; pool++) {
+ auto actor = new TTestSenderActor([]() {
+ DoTimedWork(100'000);
+ });
+ auto actorId = actorSystem.Register(actor, TMailboxType::HTSwap, pool);
+ actor->Start(actor->SelfId(), msgCount);
+ actorSystem.Send(actorId, new TEvMsg());
+ actors.push_back(actor);
+ }
+
+ while (true) {
+ size_t left = 0;
+ for (auto actor : actors) {
+ left += actor->GetCounter();
+ }
+ if (left == 0) {
+ break;
+ }
+ auto now = TInstant::Now();
+ UNIT_ASSERT_C(now - begin < TDuration::Seconds(15), "left " << left);
+ Sleep(TDuration::MilliSeconds(1));
+ }
+
+ for (size_t pool = 0; pool < pools; pool++) {
+ TVector<TExecutorThreadStats> stats;
+ TExecutorPoolStats poolStats;
+ actorSystem.GetPoolStats(pool, poolStats, stats);
+ // Sum all per-thread counters into the 0th element
+ for (ui32 idx = 1; idx < stats.size(); ++idx) {
+ stats[0].Aggregate(stats[idx]);
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEvents, msgCount);
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].PreemptedEvents, msgCount); // every 100ms event should be preempted
+ UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolActorRegistrations, 1);
+ }
+ }
+
+#endif
+
+}
diff --git a/library/cpp/actors/core/executor_thread.cpp b/library/cpp/actors/core/executor_thread.cpp
new file mode 100644
index 0000000000..446b651efd
--- /dev/null
+++ b/library/cpp/actors/core/executor_thread.cpp
@@ -0,0 +1,563 @@
+#include "executor_thread.h"
+#include "actorsystem.h"
+#include "callstack.h"
+#include "mailbox.h"
+#include "event.h"
+#include "events.h"
+
+#include <library/cpp/actors/prof/tag.h>
+#include <library/cpp/actors/util/affinity.h>
+#include <library/cpp/actors/util/datetime.h>
+#include <library/cpp/actors/util/thread.h>
+
+#ifdef BALLOC
+#include <library/cpp/balloc/optional/operators.h>
+#endif
+
+#ifdef _linux_
+#include <sys/syscall.h>
+#include <unistd.h>
+#endif
+
+#include <util/system/type_name.h>
+#include <util/system/datetime.h>
+
+LWTRACE_USING(ACTORLIB_PROVIDER)
+
+namespace NActors {
+ constexpr TDuration TExecutorThread::DEFAULT_TIME_PER_MAILBOX;
+
+ TExecutorThread::TExecutorThread(
+ TWorkerId workerId,
+ TWorkerId cpuId,
+ TActorSystem* actorSystem,
+ IExecutorPool* executorPool,
+ TMailboxTable* mailboxTable,
+ const TString& threadName,
+ TDuration timePerMailbox,
+ ui32 eventsPerMailbox)
+ : ActorSystem(actorSystem)
+ , ExecutorPool(executorPool)
+ , Ctx(workerId, cpuId, actorSystem ? actorSystem->GetMaxActivityType() : 1)
+ , ThreadName(threadName)
+ {
+ Ctx.Switch(
+ ExecutorPool,
+ mailboxTable,
+ NHPTimer::GetClockRate() * timePerMailbox.SecondsFloat(),
+ eventsPerMailbox,
+ ui64(-1), // infinite soft deadline
+ &Ctx.WorkerStats);
+ }
+
+ TActorId TExecutorThread::RegisterActor(IActor* actor, TMailboxType::EType mailboxType, ui32 poolId, const TActorId& parentId) {
+ if (poolId == Max<ui32>())
+ return Ctx.Executor->Register(actor, mailboxType, ++RevolvingWriteCounter, parentId ? parentId : CurrentRecipient);
+ else
+ return ActorSystem->Register(actor, mailboxType, poolId, ++RevolvingWriteCounter, parentId ? parentId : CurrentRecipient);
+ }
+
+ TActorId TExecutorThread::RegisterActor(IActor* actor, TMailboxHeader* mailbox, ui32 hint, const TActorId& parentId) {
+ return Ctx.Executor->Register(actor, mailbox, hint, parentId ? parentId : CurrentRecipient);
+ }
+
+ void TExecutorThread::UnregisterActor(TMailboxHeader* mailbox, ui64 localActorId) {
+ IActor* actor = mailbox->DetachActor(localActorId);
+ Ctx.DecrementActorsAliveByActivity(actor->GetActivityType());
+ DyingActors.push_back(THolder(actor));
+ }
+
+ void TExecutorThread::DropUnregistered() {
+ DyingActors.clear(); // here is actual destruction of actors
+ }
+
+ void TExecutorThread::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) {
+ ++CurrentActorScheduledEventsCounter;
+ Ctx.Executor->Schedule(deadline, ev, cookie, Ctx.WorkerId);
+ }
+
+ void TExecutorThread::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) {
+ ++CurrentActorScheduledEventsCounter;
+ Ctx.Executor->Schedule(deadline, ev, cookie, Ctx.WorkerId);
+ }
+
+ void TExecutorThread::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) {
+ ++CurrentActorScheduledEventsCounter;
+ Ctx.Executor->Schedule(delta, ev, cookie, Ctx.WorkerId);
+ }
+
+ template <class T>
+ inline TString SafeTypeName(T* t) {
+ if (t == nullptr) {
+ return "nullptr";
+ }
+ try {
+ return TypeName(*t);
+ } catch (...) {
+ return "unknown-type";
+ }
+ }
+
+ inline TString ActorTypeName(const IActor* actor, ui32 activityType) {
+ return actor ? SafeTypeName(actor) : ("activityType_" + ToString(activityType) + " (destroyed)");
+ }
+
+ inline void LwTraceSlowDelivery(IEventHandle* ev, const IActor* actor, ui32 poolId, const TActorId& currentRecipient,
+ double delivMs, double sinceActivationMs, ui32 eventsExecutedBefore) {
+ const auto baseEv = (ev && ev->HasEvent()) ? ev->GetBase() : nullptr;
+ LWPROBE(EventSlowDelivery,
+ poolId,
+ delivMs,
+ sinceActivationMs,
+ eventsExecutedBefore,
+ baseEv ? SafeTypeName(baseEv) : (ev ? ToString(ev->Type) : TString("nullptr")),
+ currentRecipient.ToString(),
+ SafeTypeName(actor));
+ }
+
+ inline void LwTraceSlowEvent(IEventHandle* ev, ui32 evTypeForTracing, const IActor* actor, ui32 poolId, ui32 activityType,
+ const TActorId& currentRecipient, double eventMs) {
+ // Event could have been destroyed by actor->Receive();
+ const auto baseEv = (ev && ev->HasEvent()) ? ev->GetBase() : nullptr;
+ LWPROBE(SlowEvent,
+ poolId,
+ eventMs,
+ baseEv ? SafeTypeName(baseEv) : ToString(evTypeForTracing),
+ currentRecipient.ToString(),
+ ActorTypeName(actor, activityType));
+ }
+
+ template <typename TMailbox>
+ void TExecutorThread::Execute(TMailbox* mailbox, ui32 hint) {
+ Y_VERIFY_DEBUG(DyingActors.empty());
+
+ bool reclaimAsFree = false;
+
+ NHPTimer::STime hpstart = GetCycleCountFast();
+ NHPTimer::STime hpprev = hpstart;
+
+ IActor* actor = nullptr;
+ ui32 prevActivityType = std::numeric_limits<ui32>::max();
+ TActorId recipient;
+ for (ui32 executed = 0; executed < Ctx.EventsPerMailbox; ++executed) {
+ TAutoPtr<IEventHandle> ev(mailbox->Pop());
+ if (!!ev) {
+ NHPTimer::STime hpnow;
+ recipient = ev->GetRecipientRewrite();
+ if (actor = mailbox->FindActor(recipient.LocalId())) {
+ TActorContext ctx(*mailbox, *this, hpprev, recipient);
+ TlsActivationContext = &ctx;
+
+#ifdef USE_ACTOR_CALLSTACK
+ TCallstack::GetTlsCallstack() = ev->Callstack;
+ TCallstack::GetTlsCallstack().SetLinesToSkip();
+#endif
+ CurrentRecipient = recipient;
+ CurrentActorScheduledEventsCounter = 0;
+
+ if (executed == 0) {
+ double usec = Ctx.AddActivationStats(AtomicLoad(&mailbox->ScheduleMoment), hpprev);
+ if (usec > 500) {
+ GLOBAL_LWPROBE(ACTORLIB_PROVIDER, SlowActivation, Ctx.PoolId, usec / 1000.0);
+ }
+ }
+
+ i64 usecDeliv = Ctx.AddEventDeliveryStats(ev->SendTime, hpprev);
+ if (usecDeliv > 5000) {
+ double sinceActivationMs = NHPTimer::GetSeconds(hpprev - hpstart) * 1000.0;
+ LwTraceSlowDelivery(ev.Get(), actor, Ctx.PoolId, CurrentRecipient, NHPTimer::GetSeconds(hpprev - ev->SendTime) * 1000.0, sinceActivationMs, executed);
+ }
+
+ ui32 evTypeForTracing = ev->Type;
+
+ ui32 activityType = actor->GetActivityType();
+ if (activityType != prevActivityType) {
+ prevActivityType = activityType;
+ NProfiling::TMemoryTagScope::Reset(ActorSystem->MemProfActivityBase + activityType);
+ }
+
+ actor->Receive(ev, ctx);
+
+ size_t dyingActorsCnt = DyingActors.size();
+ Ctx.UpdateActorsStats(dyingActorsCnt);
+ if (dyingActorsCnt) {
+ DropUnregistered();
+ actor = nullptr;
+ }
+
+ if (mailbox->IsEmpty()) // was not-free and become free, we must reclaim mailbox
+ reclaimAsFree = true;
+
+ hpnow = GetCycleCountFast();
+ NHPTimer::STime elapsed = Ctx.AddEventProcessingStats(hpprev, hpnow, activityType, CurrentActorScheduledEventsCounter);
+ if (elapsed > 1000000) {
+ LwTraceSlowEvent(ev.Get(), evTypeForTracing, actor, Ctx.PoolId, activityType, CurrentRecipient, NHPTimer::GetSeconds(elapsed) * 1000.0);
+ }
+
+ // The actor might have been destroyed
+ if (actor)
+ actor->AddElapsedTicks(elapsed);
+
+ CurrentRecipient = TActorId();
+ } else {
+ TAutoPtr<IEventHandle> nonDelivered = ev->ForwardOnNondelivery(TEvents::TEvUndelivered::ReasonActorUnknown);
+ if (nonDelivered.Get()) {
+ ActorSystem->Send(nonDelivered);
+ } else {
+ Ctx.IncrementNonDeliveredEvents();
+ }
+ hpnow = GetCycleCountFast();
+ }
+
+ hpprev = hpnow;
+
+ // Soft preemption in united pool
+ if (Ctx.SoftDeadlineTs < (ui64)hpnow) {
+ AtomicStore(&mailbox->ScheduleMoment, hpnow);
+ Ctx.IncrementMailboxPushedOutBySoftPreemption();
+ LWTRACK(MailboxPushedOutBySoftPreemption,
+ Ctx.Orbit,
+ Ctx.PoolId,
+ Ctx.Executor->GetName(),
+ executed + 1,
+ CyclesToDuration(hpnow - hpstart),
+ Ctx.WorkerId,
+ recipient.ToString(),
+ SafeTypeName(actor));
+ break;
+ }
+
+ // time limit inside one mailbox passed, let others do some work
+ if (hpnow - hpstart > (i64)Ctx.TimePerMailboxTs) {
+ AtomicStore(&mailbox->ScheduleMoment, hpnow);
+ Ctx.IncrementMailboxPushedOutByTime();
+ LWTRACK(MailboxPushedOutByTime,
+ Ctx.Orbit,
+ Ctx.PoolId,
+ Ctx.Executor->GetName(),
+ executed + 1,
+ CyclesToDuration(hpnow - hpstart),
+ Ctx.WorkerId,
+ recipient.ToString(),
+ SafeTypeName(actor));
+ break;
+ }
+
+ if (executed + 1 == Ctx.EventsPerMailbox) {
+ AtomicStore(&mailbox->ScheduleMoment, hpnow);
+ Ctx.IncrementMailboxPushedOutByEventCount();
+ LWTRACK(MailboxPushedOutByEventCount,
+ Ctx.Orbit,
+ Ctx.PoolId,
+ Ctx.Executor->GetName(),
+ executed + 1,
+ CyclesToDuration(hpnow - hpstart),
+ Ctx.WorkerId,
+ recipient.ToString(),
+ SafeTypeName(actor));
+ break;
+ }
+ } else {
+ if (executed == 0)
+ Ctx.IncrementEmptyMailboxActivation();
+ LWTRACK(MailboxEmpty,
+ Ctx.Orbit,
+ Ctx.PoolId,
+ Ctx.Executor->GetName(),
+ executed,
+ CyclesToDuration(GetCycleCountFast() - hpstart),
+ Ctx.WorkerId,
+ recipient.ToString(),
+ SafeTypeName(actor));
+ break; // empty queue, leave
+ }
+ }
+
+ NProfiling::TMemoryTagScope::Reset(0);
+ TlsActivationContext = nullptr;
+ UnlockFromExecution(mailbox, Ctx.Executor, reclaimAsFree, hint, Ctx.WorkerId, RevolvingWriteCounter);
+ }
+
+ TThreadId TExecutorThread::GetThreadId() const {
+#ifdef _linux_
+ while (AtomicLoad(&ThreadId) == UnknownThreadId) {
+ NanoSleep(1000);
+ }
+#endif
+ return ThreadId;
+ }
+
+ void* TExecutorThread::ThreadProc() {
+#ifdef _linux_
+ pid_t tid = syscall(SYS_gettid);
+ AtomicSet(ThreadId, (ui64)tid);
+#endif
+
+#ifdef BALLOC
+ ThreadDisableBalloc();
+#endif
+
+ if (ThreadName) {
+ ::SetCurrentThreadName(ThreadName);
+ }
+
+ ExecutorPool->SetRealTimeMode();
+ TAffinityGuard affinity(ExecutorPool->Affinity());
+
+ NHPTimer::STime hpnow = GetCycleCountFast();
+ NHPTimer::STime hpprev = hpnow;
+ ui64 execCount = 0;
+ ui64 readyActivationCount = 0;
+ i64 execCycles = 0;
+ i64 nonExecCycles = 0;
+
+ for (;;) {
+ if (ui32 activation = ExecutorPool->GetReadyActivation(Ctx, ++RevolvingReadCounter)) {
+ LWTRACK(ActivationBegin, Ctx.Orbit, Ctx.CpuId, Ctx.PoolId, Ctx.WorkerId, NHPTimer::GetSeconds(Ctx.Lease.GetPreciseExpireTs()) * 1e3);
+ readyActivationCount++;
+ if (TMailboxHeader* header = Ctx.MailboxTable->Get(activation)) {
+ if (header->LockForExecution()) {
+ hpnow = GetCycleCountFast();
+ nonExecCycles += hpnow - hpprev;
+ hpprev = hpnow;
+ switch (header->Type) {
+ case TMailboxType::Simple:
+ Execute(static_cast<TMailboxTable::TSimpleMailbox*>(header), activation);
+ break;
+ case TMailboxType::Revolving:
+ Execute(static_cast<TMailboxTable::TRevolvingMailbox*>(header), activation);
+ break;
+ case TMailboxType::HTSwap:
+ Execute(static_cast<TMailboxTable::THTSwapMailbox*>(header), activation);
+ break;
+ case TMailboxType::ReadAsFilled:
+ Execute(static_cast<TMailboxTable::TReadAsFilledMailbox*>(header), activation);
+ break;
+ case TMailboxType::TinyReadAsFilled:
+ Execute(static_cast<TMailboxTable::TTinyReadAsFilledMailbox*>(header), activation);
+ break;
+ }
+ hpnow = GetCycleCountFast();
+ execCycles += hpnow - hpprev;
+ hpprev = hpnow;
+ execCount++;
+ if (execCycles + nonExecCycles > 39000000) { // every 15 ms at 2.6GHz, so 1000 items is 15 sec (solomon interval)
+ LWPROBE(ExecutorThreadStats, ExecutorPool->PoolId, ExecutorPool->GetName(), Ctx.WorkerId,
+ execCount, readyActivationCount,
+ NHPTimer::GetSeconds(execCycles) * 1000.0, NHPTimer::GetSeconds(nonExecCycles) * 1000.0);
+ execCount = 0;
+ readyActivationCount = 0;
+ execCycles = 0;
+ nonExecCycles = 0;
+ Ctx.UpdateThreadTime();
+ }
+ }
+ }
+ LWTRACK(ActivationEnd, Ctx.Orbit, Ctx.CpuId, Ctx.PoolId, Ctx.WorkerId);
+ Ctx.Orbit.Reset();
+ } else { // no activation means PrepareStop was called so thread must terminate
+ break;
+ }
+ }
+ return nullptr;
+ }
+
+ // there must be barrier and check-read with following cas
+ // or just cas w/o read.
+ // or queue unlocks must be performed with exchange and not generic write
+ // TODO: check performance of those options under contention
+
+ // placed here in hope for better compiler optimization
+
+ bool TMailboxHeader::MarkForSchedule() {
+ AtomicBarrier();
+ for (;;) {
+ const ui32 state = AtomicLoad(&ExecutionState);
+ switch (state) {
+ case TExecutionState::Inactive:
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::Scheduled, TExecutionState::Inactive))
+ return true;
+ break;
+ case TExecutionState::Scheduled:
+ return false;
+ case TExecutionState::Leaving:
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::LeavingMarked, TExecutionState::Leaving))
+ return true;
+ break;
+ case TExecutionState::Executing:
+ case TExecutionState::LeavingMarked:
+ return false;
+ case TExecutionState::Free:
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeScheduled, TExecutionState::Free))
+ return true;
+ break;
+ case TExecutionState::FreeScheduled:
+ return false;
+ case TExecutionState::FreeLeaving:
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeLeavingMarked, TExecutionState::FreeLeaving))
+ return true;
+ break;
+ case TExecutionState::FreeExecuting:
+ case TExecutionState::FreeLeavingMarked:
+ return false;
+ default:
+ Y_FAIL();
+ }
+ }
+ }
+
+ bool TMailboxHeader::LockForExecution() {
+ AtomicBarrier(); // strictly speaking here should be AtomicBarrier, but as we got mailboxes from queue - this barrier is already set implicitly and could be removed
+ for (;;) {
+ const ui32 state = AtomicLoad(&ExecutionState);
+ switch (state) {
+ case TExecutionState::Inactive:
+ return false;
+ case TExecutionState::Scheduled:
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::Executing, TExecutionState::Scheduled))
+ return true;
+ break;
+ case TExecutionState::Leaving:
+ case TExecutionState::Executing:
+ case TExecutionState::LeavingMarked:
+ return false;
+ case TExecutionState::Free:
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeExecuting, TExecutionState::Free))
+ return true;
+ break;
+ case TExecutionState::FreeScheduled:
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeExecuting, TExecutionState::FreeScheduled))
+ return true;
+ break;
+ case TExecutionState::FreeLeaving:
+ case TExecutionState::FreeExecuting:
+ case TExecutionState::FreeLeavingMarked:
+ return false;
+ default:
+ Y_FAIL();
+ }
+ }
+ }
+
+ bool TMailboxHeader::LockFromFree() {
+ AtomicBarrier();
+ for (;;) {
+ const ui32 state = AtomicLoad(&ExecutionState);
+ switch (state) {
+ case TExecutionState::Inactive:
+ case TExecutionState::Scheduled:
+ case TExecutionState::Leaving:
+ case TExecutionState::Executing:
+ case TExecutionState::LeavingMarked:
+ Y_FAIL();
+ case TExecutionState::Free:
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::Executing, TExecutionState::Free))
+ return true;
+ break;
+ case TExecutionState::FreeScheduled:
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::Executing, TExecutionState::FreeScheduled))
+ return true;
+ break;
+ case TExecutionState::FreeLeaving:
+ case TExecutionState::FreeExecuting:
+ case TExecutionState::FreeLeavingMarked:
+ return false;
+ default:
+ Y_FAIL();
+ }
+ }
+ }
+
+ void TMailboxHeader::UnlockFromExecution1() {
+ const ui32 state = AtomicLoad(&ExecutionState);
+ if (state == TExecutionState::Executing)
+ AtomicStore(&ExecutionState, (ui32)TExecutionState::Leaving);
+ else if (state == TExecutionState::FreeExecuting)
+ AtomicStore(&ExecutionState, (ui32)TExecutionState::FreeLeaving);
+ else
+ Y_FAIL();
+ AtomicBarrier();
+ }
+
+ bool TMailboxHeader::UnlockFromExecution2(bool wouldReschedule) {
+ AtomicBarrier();
+ for (;;) {
+ const ui32 state = AtomicLoad(&ExecutionState);
+ switch (state) {
+ case TExecutionState::Inactive:
+ case TExecutionState::Scheduled:
+ Y_FAIL();
+ case TExecutionState::Leaving:
+ if (!wouldReschedule) {
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::Inactive, TExecutionState::Leaving))
+ return false;
+ } else {
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::Scheduled, TExecutionState::Leaving))
+ return true;
+ }
+ break;
+ case TExecutionState::Executing:
+ Y_FAIL();
+ case TExecutionState::LeavingMarked:
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::Scheduled, TExecutionState::LeavingMarked))
+ return true;
+ break;
+ case TExecutionState::Free:
+ case TExecutionState::FreeScheduled:
+ Y_FAIL();
+ case TExecutionState::FreeLeaving:
+ if (!wouldReschedule) {
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::Free, TExecutionState::FreeLeaving))
+ return false;
+ } else {
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeScheduled, TExecutionState::FreeLeaving))
+ return true;
+ }
+ break;
+ case TExecutionState::FreeExecuting:
+ Y_FAIL();
+ case TExecutionState::FreeLeavingMarked:
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeScheduled, TExecutionState::FreeLeavingMarked))
+ return true;
+ break;
+ default:
+ Y_FAIL();
+ }
+ }
+ }
+
+ bool TMailboxHeader::UnlockAsFree(bool wouldReschedule) {
+ AtomicBarrier();
+ for (;;) {
+ const ui32 state = AtomicLoad(&ExecutionState);
+ switch (state) {
+ case TExecutionState::Inactive:
+ case TExecutionState::Scheduled:
+ Y_FAIL();
+ case TExecutionState::Leaving:
+ if (!wouldReschedule) {
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::Free, TExecutionState::Leaving))
+ return false;
+ } else {
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeScheduled, TExecutionState::Leaving))
+ return true;
+ }
+ break;
+ case TExecutionState::Executing:
+ Y_FAIL();
+ case TExecutionState::LeavingMarked:
+ if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeScheduled, TExecutionState::LeavingMarked))
+ return true;
+ break;
+ case TExecutionState::Free:
+ case TExecutionState::FreeScheduled:
+ case TExecutionState::FreeLeaving:
+ case TExecutionState::FreeExecuting:
+ case TExecutionState::FreeLeavingMarked:
+ Y_FAIL();
+ default:
+ Y_FAIL();
+ }
+ }
+ }
+}
diff --git a/library/cpp/actors/core/executor_thread.h b/library/cpp/actors/core/executor_thread.h
new file mode 100644
index 0000000000..9d3c573f0d
--- /dev/null
+++ b/library/cpp/actors/core/executor_thread.h
@@ -0,0 +1,112 @@
+#pragma once
+
+#include "defs.h"
+#include "event.h"
+#include "actor.h"
+#include "actorsystem.h"
+#include "callstack.h"
+#include "probes.h"
+#include "worker_context.h"
+
+#include <library/cpp/actors/util/datetime.h>
+
+#include <util/system/thread.h>
+
+namespace NActors {
+
+ class TExecutorThread: public ISimpleThread {
+ public:
+ static constexpr TDuration DEFAULT_TIME_PER_MAILBOX =
+ TDuration::MilliSeconds(10);
+ static constexpr ui32 DEFAULT_EVENTS_PER_MAILBOX = 100;
+
+ TExecutorThread(TWorkerId workerId,
+ TWorkerId cpuId,
+ TActorSystem* actorSystem,
+ IExecutorPool* executorPool,
+ TMailboxTable* mailboxTable,
+ const TString& threadName,
+ TDuration timePerMailbox = DEFAULT_TIME_PER_MAILBOX,
+ ui32 eventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX);
+
+ TExecutorThread(TWorkerId workerId,
+ TActorSystem* actorSystem,
+ IExecutorPool* executorPool,
+ TMailboxTable* mailboxTable,
+ const TString& threadName,
+ TDuration timePerMailbox = DEFAULT_TIME_PER_MAILBOX,
+ ui32 eventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX)
+ : TExecutorThread(workerId, 0, actorSystem, executorPool, mailboxTable, threadName, timePerMailbox, eventsPerMailbox)
+ {}
+
+ TActorId RegisterActor(IActor* actor, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>(),
+ const TActorId& parentId = TActorId());
+ TActorId RegisterActor(IActor* actor, TMailboxHeader* mailbox, ui32 hint, const TActorId& parentId = TActorId());
+ void UnregisterActor(TMailboxHeader* mailbox, ui64 localActorId);
+ void DropUnregistered();
+ const std::vector<THolder<IActor>>& GetUnregistered() const { return DyingActors; }
+
+ void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr);
+ void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr);
+ void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr);
+
+ bool Send(TAutoPtr<IEventHandle> ev) {
+#ifdef USE_ACTOR_CALLSTACK
+ ev->Callstack = TCallstack::GetTlsCallstack();
+ ev->Callstack.Trace();
+#endif
+ Ctx.IncrementSentEvents();
+ return ActorSystem->Send(ev);
+ }
+
+ void GetCurrentStats(TExecutorThreadStats& statsCopy) const {
+ Ctx.GetCurrentStats(statsCopy);
+ }
+
+ TThreadId GetThreadId() const; // blocks, must be called after Start()
+ TWorkerId GetWorkerId() const { return Ctx.WorkerId; }
+
+ private:
+ void* ThreadProc();
+
+ template <typename TMailbox>
+ void Execute(TMailbox* mailbox, ui32 hint);
+
+ public:
+ TActorSystem* const ActorSystem;
+
+ private:
+ // Pool-specific
+ IExecutorPool* const ExecutorPool;
+
+ // Event-specific (currently executing)
+ TVector<THolder<IActor>> DyingActors;
+ TActorId CurrentRecipient;
+ ui64 CurrentActorScheduledEventsCounter = 0;
+
+ // Thread-specific
+ TWorkerContext Ctx;
+ ui64 RevolvingReadCounter = 0;
+ ui64 RevolvingWriteCounter = 0;
+ const TString ThreadName;
+ volatile TThreadId ThreadId = UnknownThreadId;
+ };
+
+ template <typename TMailbox>
+ void UnlockFromExecution(TMailbox* mailbox, IExecutorPool* executorPool, bool asFree, ui32 hint, TWorkerId workerId, ui64& revolvingWriteCounter) {
+ mailbox->UnlockFromExecution1();
+ const bool needReschedule1 = (nullptr != mailbox->Head());
+ if (!asFree) {
+ if (mailbox->UnlockFromExecution2(needReschedule1)) {
+ RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());
+ executorPool->ScheduleActivationEx(hint, ++revolvingWriteCounter);
+ }
+ } else {
+ if (mailbox->UnlockAsFree(needReschedule1)) {
+ RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());
+ executorPool->ScheduleActivationEx(hint, ++revolvingWriteCounter);
+ }
+ executorPool->ReclaimMailbox(TMailbox::MailboxType, hint, workerId, ++revolvingWriteCounter);
+ }
+ }
+}
diff --git a/library/cpp/actors/core/hfunc.h b/library/cpp/actors/core/hfunc.h
new file mode 100644
index 0000000000..26f3c65013
--- /dev/null
+++ b/library/cpp/actors/core/hfunc.h
@@ -0,0 +1,84 @@
+#pragma once
+
+#include "actor.h"
+#include "executor_thread.h"
+
+#include <util/system/defaults.h>
+
+#define HFunc(TEvType, HandleFunc) \
+ case TEvType::EventType: { \
+ typename TEvType::TPtr* x = reinterpret_cast<typename TEvType::TPtr*>(&ev); \
+ HandleFunc(*x, ctx); \
+ break; \
+ }
+
+#define hFunc(TEvType, HandleFunc) \
+ case TEvType::EventType: { \
+ typename TEvType::TPtr* x = reinterpret_cast<typename TEvType::TPtr*>(&ev); \
+ HandleFunc(*x); \
+ break; \
+ }
+
+#define HFuncTraced(TEvType, HandleFunc) \
+ case TEvType::EventType: { \
+ TRACE_EVENT_TYPE(Y_STRINGIZE(TEvType)); \
+ TEvType::TPtr* x = reinterpret_cast<TEvType::TPtr*>(&ev); \
+ HandleFunc(*x, ctx); \
+ break; \
+ }
+
+#define hFuncTraced(TEvType, HandleFunc) \
+ case TEvType::EventType: { \
+ TRACE_EVENT_TYPE(Y_STRINGIZE(TEvType)); \
+ typename TEvType::TPtr* x = reinterpret_cast<typename TEvType::TPtr*>(&ev); \
+ HandleFunc(*x); \
+ break; \
+ }
+
+#define HTemplFunc(TEvType, HandleFunc) \
+ case TEvType::EventType: { \
+ typename TEvType::TPtr* x = reinterpret_cast<typename TEvType::TPtr*>(&ev); \
+ HandleFunc(*x, ctx); \
+ break; \
+ }
+
+#define hTemplFunc(TEvType, HandleFunc) \
+ case TEvType::EventType: { \
+ typename TEvType::TPtr* x = reinterpret_cast<typename TEvType::TPtr*>(&ev); \
+ HandleFunc(*x); \
+ break; \
+ }
+
+#define SFunc(TEvType, HandleFunc) \
+ case TEvType::EventType: \
+ HandleFunc(ctx); \
+ break;
+
+#define sFunc(TEvType, HandleFunc) \
+ case TEvType::EventType: \
+ HandleFunc(); \
+ break;
+
+#define CFunc(TEventType, HandleFunc) \
+ case TEventType: \
+ HandleFunc(ctx); \
+ break;
+
+#define cFunc(TEventType, HandleFunc) \
+ case TEventType: \
+ HandleFunc(); \
+ break;
+
+#define FFunc(TEventType, HandleFunc) \
+ case TEventType: \
+ HandleFunc(ev, ctx); \
+ break;
+
+#define fFunc(TEventType, HandleFunc) \
+ case TEventType: \
+ HandleFunc(ev); \
+ break;
+
+#define IgnoreFunc(TEvType) \
+ case TEvType::EventType: \
+ break;
diff --git a/library/cpp/actors/core/interconnect.cpp b/library/cpp/actors/core/interconnect.cpp
new file mode 100644
index 0000000000..9fb33413b2
--- /dev/null
+++ b/library/cpp/actors/core/interconnect.cpp
@@ -0,0 +1,170 @@
+#include "interconnect.h"
+#include <util/digest/murmur.h>
+#include <google/protobuf/text_format.h>
+
+namespace NActors {
+
+ TNodeLocation::TNodeLocation(const NActorsInterconnect::TNodeLocation& location) {
+ const NProtoBuf::Descriptor *descriptor = NActorsInterconnect::TNodeLocation::descriptor();
+ const NActorsInterconnect::TNodeLocation *locp = &location;
+ NActorsInterconnect::TNodeLocation temp; // for legacy location case
+
+ // WalleConfig compatibility section
+ if (locp->HasBody()) {
+ if (locp == &location) {
+ temp.CopyFrom(*locp);
+ locp = &temp;
+ }
+ temp.SetUnit(::ToString(temp.GetBody()));
+ temp.ClearBody();
+ }
+
+ // legacy value processing
+ if (locp->HasDataCenterNum() || locp->HasRoomNum() || locp->HasRackNum() || locp->HasBodyNum()) {
+ if (locp == &location) {
+ temp.CopyFrom(*locp);
+ locp = &temp;
+ }
+ LegacyValue = TLegacyValue{temp.GetDataCenterNum(), temp.GetRoomNum(), temp.GetRackNum(), temp.GetBodyNum()};
+ temp.ClearDataCenterNum();
+ temp.ClearRoomNum();
+ temp.ClearRackNum();
+ temp.ClearBodyNum();
+
+ // legacy format must not interfere with new one
+ const NProtoBuf::Reflection *reflection = temp.GetReflection();
+ for (int i = 0, count = descriptor->field_count(); i < count; ++i) {
+ Y_VERIFY(!reflection->HasField(temp, descriptor->field(i)));
+ }
+
+ const auto& v = LegacyValue->DataCenter;
+ const char *p = reinterpret_cast<const char*>(&v);
+ temp.SetDataCenter(TString(p, strnlen(p, sizeof(ui32))));
+ temp.SetModule(::ToString(LegacyValue->Room));
+ temp.SetRack(::ToString(LegacyValue->Rack));
+ temp.SetUnit(::ToString(LegacyValue->Body));
+ }
+
+ auto makeString = [&] {
+ NProtoBuf::TextFormat::Printer p;
+ p.SetSingleLineMode(true);
+ TString s;
+ p.PrintToString(*locp, &s);
+ return s;
+ };
+
+ // modern format parsing
+ const NProtoBuf::Reflection *reflection = locp->GetReflection();
+ for (int i = 0, count = descriptor->field_count(); i < count; ++i) {
+ const NProtoBuf::FieldDescriptor *field = descriptor->field(i);
+ if (reflection->HasField(*locp, field)) {
+ Y_VERIFY(field->type() == NProtoBuf::FieldDescriptor::TYPE_STRING, "Location# %s", makeString().data());
+ Items.emplace_back(TKeys::E(field->number()), reflection->GetString(*locp, field));
+ }
+ }
+ const NProtoBuf::UnknownFieldSet& unknown = locp->unknown_fields();
+ for (int i = 0, count = unknown.field_count(); i < count; ++i) {
+ const NProtoBuf::UnknownField& field = unknown.field(i);
+ Y_VERIFY(field.type() == NProtoBuf::UnknownField::TYPE_LENGTH_DELIMITED, "Location# %s", makeString().data());
+ Items.emplace_back(TKeys::E(field.number()), field.length_delimited());
+ }
+ std::sort(Items.begin(), Items.end());
+ }
+
+ TNodeLocation::TNodeLocation(TFromSerialized, const TString& s)
+ : TNodeLocation(ParseLocation(s))
+ {}
+
+ NActorsInterconnect::TNodeLocation TNodeLocation::ParseLocation(const TString& s) {
+ NActorsInterconnect::TNodeLocation res;
+ const bool success = res.ParseFromString(s);
+ Y_VERIFY(success);
+ return res;
+ }
+
+ TString TNodeLocation::ToStringUpTo(TKeys::E upToKey) const {
+ const NProtoBuf::Descriptor *descriptor = NActorsInterconnect::TNodeLocation::descriptor();
+
+ TStringBuilder res;
+ for (const auto& [key, value] : Items) {
+ if (upToKey < key) {
+ break;
+ }
+ TString name;
+ if (const NProtoBuf::FieldDescriptor *field = descriptor->FindFieldByNumber(key)) {
+ name = field->options().GetExtension(NActorsInterconnect::PrintName);
+ } else {
+ name = ::ToString(int(key));
+ }
+ if (key != upToKey) {
+ res << name << "=" << value << "/";
+ } else {
+ res << value;
+ }
+ }
+ return res;
+ }
+
+ void TNodeLocation::Serialize(NActorsInterconnect::TNodeLocation *pb) const {
+ const NProtoBuf::Descriptor *descriptor = NActorsInterconnect::TNodeLocation::descriptor();
+ const NProtoBuf::Reflection *reflection = pb->GetReflection();
+ NProtoBuf::UnknownFieldSet *unknown = pb->mutable_unknown_fields();
+ for (const auto& [key, value] : Items) {
+ if (const NProtoBuf::FieldDescriptor *field = descriptor->FindFieldByNumber(key)) {
+ reflection->SetString(pb, field, value);
+ } else {
+ unknown->AddLengthDelimited(key)->assign(value);
+ }
+ }
+ }
+
+ TString TNodeLocation::GetSerializedLocation() const {
+ NActorsInterconnect::TNodeLocation pb;
+ Serialize(&pb);
+ TString s;
+ const bool success = pb.SerializeToString(&s);
+ Y_VERIFY(success);
+ return s;
+ }
+
+ TNodeLocation::TLegacyValue TNodeLocation::GetLegacyValue() const {
+ if (LegacyValue) {
+ return *LegacyValue;
+ }
+
+ ui32 dataCenterId = 0, moduleId = 0, rackId = 0, unitId = 0;
+
+ for (const auto& [key, value] : Items) {
+ switch (key) {
+ case TKeys::DataCenter:
+ memcpy(&dataCenterId, value.data(), Min<size_t>(sizeof(dataCenterId), value.length()));
+ break;
+
+ case TKeys::Module: {
+ const bool success = TryFromString(value, moduleId);
+ Y_VERIFY(success);
+ break;
+ }
+
+ case TKeys::Rack:
+ // hacky way to obtain numeric id by a rack name
+ if (!TryFromString(value, rackId)) {
+ rackId = MurmurHash<ui32>(value.data(), value.length());
+ }
+ break;
+
+ case TKeys::Unit: {
+ const bool success = TryFromString(value, unitId);
+ Y_VERIFY(success);
+ break;
+ }
+
+ default:
+ Y_FAIL("unexpected legacy key# %d", key);
+ }
+ }
+
+ return {dataCenterId, moduleId, rackId, unitId};
+ }
+
+} // NActors
diff --git a/library/cpp/actors/core/interconnect.h b/library/cpp/actors/core/interconnect.h
new file mode 100644
index 0000000000..8d1cbd1e77
--- /dev/null
+++ b/library/cpp/actors/core/interconnect.h
@@ -0,0 +1,248 @@
+#pragma once
+
+#include "events.h"
+#include "event_local.h"
+#include <library/cpp/actors/protos/interconnect.pb.h>
+#include <util/string/cast.h>
+#include <util/string/builder.h>
+
+namespace NActors {
+ class TNodeLocation {
+ public:
+ struct TKeys {
+ enum E : int {
+ DataCenter = 10,
+ Module = 20,
+ Rack = 30,
+ Unit = 40,
+ };
+ };
+
+ struct TLegacyValue {
+ ui32 DataCenter;
+ ui32 Room;
+ ui32 Rack;
+ ui32 Body;
+
+ auto ConvertToTuple() const { return std::make_tuple(DataCenter, Room, Rack, Body); }
+
+ int Compare(const TLegacyValue& other) const {
+ const auto x = ConvertToTuple();
+ const auto y = other.ConvertToTuple();
+ if (x < y) {
+ return -1;
+ } else if (y < x) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+
+ friend bool operator ==(const TLegacyValue& x, const TLegacyValue& y) { return x.Compare(y) == 0; }
+ };
+
+ private:
+ std::optional<TLegacyValue> LegacyValue;
+ std::vector<std::pair<TKeys::E, TString>> Items;
+
+ public:
+ // generic ctors
+ TNodeLocation() = default;
+ TNodeLocation(const TNodeLocation&) = default;
+ TNodeLocation(TNodeLocation&&) = default;
+
+ // protobuf-parser ctor
+ explicit TNodeLocation(const NActorsInterconnect::TNodeLocation& location);
+
+ // serialized protobuf ctor
+ static constexpr struct TFromSerialized {} FromSerialized {};
+ TNodeLocation(TFromSerialized, const TString& s);
+
+ // parser helper function
+ static NActorsInterconnect::TNodeLocation ParseLocation(const TString& s);
+
+ // assignment operators
+ TNodeLocation& operator =(const TNodeLocation&) = default;
+ TNodeLocation& operator =(TNodeLocation&&) = default;
+
+ void Serialize(NActorsInterconnect::TNodeLocation *pb) const;
+ TString GetSerializedLocation() const;
+
+ TString GetDataCenterId() const { return ToStringUpTo(TKeys::DataCenter); }
+ TString GetModuleId() const { return ToStringUpTo(TKeys::Module); }
+ TString GetRackId() const { return ToStringUpTo(TKeys::Rack); }
+ TString ToString() const { return ToStringUpTo(TKeys::E(Max<int>())); }
+ TString ToStringUpTo(TKeys::E upToKey) const;
+
+ TLegacyValue GetLegacyValue() const;
+
+ const std::vector<std::pair<TKeys::E, TString>>& GetItems() const { return Items; }
+
+ bool HasKey(TKeys::E key) const {
+ auto comp = [](const auto& p, TKeys::E value) { return p.first < value; };
+ const auto it = std::lower_bound(Items.begin(), Items.end(), key, comp);
+ return it != Items.end() && it->first == key;
+ }
+
+ int Compare(const TNodeLocation& other) const {
+ if (LegacyValue || other.LegacyValue) {
+ return GetLegacyValue().Compare(other.GetLegacyValue());
+ } else if (Items < other.Items) {
+ return -1;
+ } else if (other.Items < Items) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+
+ void InheritLegacyValue(const TNodeLocation& other) {
+ LegacyValue = other.GetLegacyValue();
+ }
+
+ friend bool operator ==(const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) == 0; }
+ friend bool operator !=(const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) != 0; }
+ friend bool operator < (const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) < 0; }
+ friend bool operator <=(const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) <= 0; }
+ friend bool operator > (const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) > 0; }
+ friend bool operator >=(const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) >= 0; }
+ };
+
+ struct TEvInterconnect {
+ enum EEv {
+ EvForward = EventSpaceBegin(TEvents::ES_INTERCONNECT),
+ EvResolveNode, // resolve info about node (internal)
+ EvNodeAddress, // node info (internal)
+ EvConnectNode, // request proxy to establish connection (like: we would send something there soon)
+ EvAcceptIncoming,
+ EvNodeConnected, // node connected notify
+ EvNodeDisconnected, // node disconnected notify
+ EvRegisterNode,
+ EvRegisterNodeResult,
+ EvListNodes,
+ EvNodesInfo,
+ EvDisconnect,
+ EvGetNode,
+ EvNodeInfo,
+ EvClosePeerSocket,
+ EvCloseInputSession,
+ EvPoisonSession,
+ EvTerminate,
+ EvEnd
+ };
+
+ enum ESubscribes {
+ SubConnected,
+ SubDisconnected,
+ };
+
+ static_assert(EvEnd < EventSpaceEnd(TEvents::ES_INTERCONNECT), "expect EvEnd < EventSpaceEnd(TEvents::ES_INTERCONNECT)");
+
+ struct TEvResolveNode;
+ struct TEvNodeAddress;
+
+ struct TEvConnectNode: public TEventBase<TEvConnectNode, EvConnectNode> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvConnectNode, "TEvInterconnect::TEvConnectNode")
+ };
+
+ struct TEvAcceptIncoming;
+
+ struct TEvNodeConnected: public TEventLocal<TEvNodeConnected, EvNodeConnected> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvNodeConnected, "TEvInterconnect::TEvNodeConnected")
+ TEvNodeConnected(ui32 node) noexcept
+ : NodeId(node)
+ {
+ }
+ const ui32 NodeId;
+ };
+
+ struct TEvNodeDisconnected: public TEventLocal<TEvNodeDisconnected, EvNodeDisconnected> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvNodeDisconnected, "TEvInterconnect::TEvNodeDisconnected")
+ TEvNodeDisconnected(ui32 node) noexcept
+ : NodeId(node)
+ {
+ }
+ const ui32 NodeId;
+ };
+
+ struct TEvRegisterNode;
+ struct TEvRegisterNodeResult;
+
+ struct TEvListNodes: public TEventLocal<TEvListNodes, EvListNodes> {
+ };
+
+ struct TNodeInfo {
+ ui32 NodeId;
+ TString Address;
+ TString Host;
+ TString ResolveHost;
+ ui16 Port;
+ TNodeLocation Location;
+
+ TNodeInfo() = default;
+ TNodeInfo(const TNodeInfo&) = default;
+ TNodeInfo& operator =(const TNodeInfo&) = default;
+ TNodeInfo(ui32 nodeId,
+ const TString& address,
+ const TString& host,
+ const TString& resolveHost,
+ ui16 port,
+ const TNodeLocation& location)
+ : NodeId(nodeId)
+ , Address(address)
+ , Host(host)
+ , ResolveHost(resolveHost)
+ , Port(port)
+ , Location(location)
+ {
+ }
+
+ operator ui32() const {
+ return NodeId;
+ }
+ };
+
+ struct TEvNodesInfo: public TEventLocal<TEvNodesInfo, EvNodesInfo> {
+ TVector<TNodeInfo> Nodes;
+
+ const TNodeInfo* GetNodeInfo(ui32 nodeId) const {
+ for (const auto& x : Nodes) {
+ if (x.NodeId == nodeId)
+ return &x;
+ }
+ return nullptr;
+ }
+ };
+
+ struct TEvDisconnect;
+
+ struct TEvGetNode: public TEventLocal<TEvGetNode, EvGetNode> {
+ ui32 NodeId;
+ TInstant Deadline;
+
+ TEvGetNode(ui32 nodeId, TInstant deadline = TInstant::Max())
+ : NodeId(nodeId)
+ , Deadline(deadline)
+ {
+ }
+ };
+
+ struct TEvNodeInfo: public TEventLocal<TEvNodeInfo, EvNodeInfo> {
+ TEvNodeInfo(ui32 nodeId)
+ : NodeId(nodeId)
+ {
+ }
+
+ ui32 NodeId;
+ THolder<TNodeInfo> Node;
+ };
+
+ struct TEvClosePeerSocket : TEventLocal<TEvClosePeerSocket, EvClosePeerSocket> {};
+
+ struct TEvCloseInputSession : TEventLocal<TEvCloseInputSession, EvCloseInputSession> {};
+
+ struct TEvPoisonSession : TEventLocal<TEvPoisonSession, EvPoisonSession> {};
+
+ struct TEvTerminate : TEventLocal<TEvTerminate, EvTerminate> {};
+ };
+}
diff --git a/library/cpp/actors/core/invoke.h b/library/cpp/actors/core/invoke.h
new file mode 100644
index 0000000000..931a9767dd
--- /dev/null
+++ b/library/cpp/actors/core/invoke.h
@@ -0,0 +1,110 @@
+#pragma once
+
+#include "actor_bootstrapped.h"
+#include "events.h"
+#include "event_local.h"
+
+#include <any>
+#include <type_traits>
+#include <utility>
+#include <variant>
+
+#include <util/system/type_name.h>
+
+namespace NActors {
+
+ struct TEvents::TEvInvokeResult
+ : TEventLocal<TEvInvokeResult, TSystem::InvokeResult>
+ {
+ using TProcessCallback = std::function<void(TEvInvokeResult&, const TActorContext&)>;
+ TProcessCallback ProcessCallback;
+ std::variant<std::any /* the value */, std::exception_ptr> Result;
+
+ // This constructor creates TEvInvokeResult with the result of calling callback(args...) or exception_ptr,
+ // if exception occurs during evaluation.
+ template<typename TCallback, typename... TArgs>
+ TEvInvokeResult(TProcessCallback&& process, TCallback&& callback, TArgs&&... args)
+ : ProcessCallback(std::move(process))
+ {
+ try {
+ if constexpr (std::is_void_v<std::invoke_result_t<TCallback, TArgs...>>) {
+ // just invoke callback without saving any value
+ std::invoke(std::forward<TCallback>(callback), std::forward<TArgs>(args)...);
+ } else {
+ Result.emplace<std::any>(std::invoke(std::forward<TCallback>(callback), std::forward<TArgs>(args)...));
+ }
+ } catch (...) {
+ Result.emplace<std::exception_ptr>(std::current_exception());
+ }
+ }
+
+ void Process(const TActorContext& ctx) {
+ ProcessCallback(*this, ctx);
+ }
+
+ template<typename TCallback>
+ std::invoke_result_t<TCallback, const TActorContext&> GetResult() {
+ using T = std::invoke_result_t<TCallback, const TActorContext&>;
+ return std::visit([](auto& arg) -> T {
+ using TArg = std::decay_t<decltype(arg)>;
+ if constexpr (std::is_same_v<TArg, std::exception_ptr>) {
+ std::rethrow_exception(arg);
+ } else if constexpr (std::is_void_v<T>) {
+ Y_VERIFY(!arg.has_value());
+ } else if (auto *value = std::any_cast<T>(&arg)) {
+ return std::move(*value);
+ } else {
+ Y_FAIL("unspported return type for TEvInvokeResult: actual# %s != expected# %s",
+ TypeName(arg.type()).data(), TypeName<T>().data());
+ }
+ }, Result);
+ }
+ };
+
+ // Invoke Actor is used to make different procedure calls in specific threads pools.
+ //
+ // Actor is created by CreateInvokeActor(callback, complete) where `callback` is the function that will be invoked
+ // upon actor registration, which will issue then TEvInvokeResult to the parent actor with the result of called
+ // function. If the called function throws exception, then the exception will arrive in the result. Receiver of
+ // this message can either handle it by its own means calling ev.GetResult() (which will rethrow exception if it
+ // has occured in called function or return its return value; notice that when there is no return value, then
+ // GetResult() should also be called to prevent losing exception), or invoke ev.Process(), which will invoke
+ // callback provided as `complete` parameter to the CreateInvokeActor function. Complete handler is invoked with
+ // the result-getter lambda as the first argument and the actor system context as the second one. Result-getter
+ // should be called to obtain resulting value or exception like the GetResult() method of the TEvInvokeResult event.
+ //
+ // Notice that `callback` execution usually occurs in separate actor on separate mailbox and should not use parent
+ // actor's class. But `complete` handler is invoked in parent context and can use its contents. Do not forget to
+ // handle TEvInvokeResult event by calling Process/GetResult method, whichever is necessary.
+
+ template<typename TCallback, typename TCompletion, ui32 Activity>
+ class TInvokeActor : public TActorBootstrapped<TInvokeActor<TCallback, TCompletion, Activity>> {
+ TCallback Callback;
+ TCompletion Complete;
+
+ public:
+ static constexpr auto ActorActivityType() {
+ return static_cast<IActor::EActorActivity>(Activity);
+ }
+
+ TInvokeActor(TCallback&& callback, TCompletion&& complete)
+ : Callback(std::move(callback))
+ , Complete(std::move(complete))
+ {}
+
+ void Bootstrap(const TActorId& parentId, const TActorContext& ctx) {
+ auto process = [complete = std::move(Complete)](TEvents::TEvInvokeResult& res, const TActorContext& ctx) {
+ complete([&] { return res.GetResult<TCallback>(); }, ctx);
+ };
+ ctx.Send(parentId, new TEvents::TEvInvokeResult(std::move(process), std::move(Callback), ctx));
+ TActorBootstrapped<TInvokeActor>::Die(ctx);
+ }
+ };
+
+ template<ui32 Activity, typename TCallback, typename TCompletion>
+ std::unique_ptr<IActor> CreateInvokeActor(TCallback&& callback, TCompletion&& complete) {
+ return std::make_unique<TInvokeActor<std::decay_t<TCallback>, std::decay_t<TCompletion>, Activity>>(
+ std::forward<TCallback>(callback), std::forward<TCompletion>(complete));
+ }
+
+} // NActors
diff --git a/library/cpp/actors/core/io_dispatcher.cpp b/library/cpp/actors/core/io_dispatcher.cpp
new file mode 100644
index 0000000000..90699ff16c
--- /dev/null
+++ b/library/cpp/actors/core/io_dispatcher.cpp
@@ -0,0 +1,234 @@
+#include "io_dispatcher.h"
+#include "actor_bootstrapped.h"
+#include "hfunc.h"
+#include <util/system/mutex.h>
+#include <util/system/condvar.h>
+#include <util/system/thread.h>
+#include <map>
+#include <list>
+
+namespace NActors {
+
+ class TIoDispatcherActor : public TActorBootstrapped<TIoDispatcherActor> {
+ enum {
+ EvNotifyThreadStopped = EventSpaceBegin(TEvents::ES_PRIVATE),
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // IO task queue
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ class TTask {
+ TInstant Timestamp;
+ std::function<void()> Callback;
+
+ public:
+ TTask(TInstant timestamp, TEvInvokeQuery *ev)
+ : Timestamp(timestamp)
+ , Callback(std::move(ev->Callback))
+ {}
+
+ void Execute() {
+ Callback();
+ }
+
+ TInstant GetTimestamp() const {
+ return Timestamp;
+ }
+ };
+
+ class TTaskQueue {
+ std::list<TTask> Tasks;
+ TMutex Mutex;
+ TCondVar CondVar;
+ size_t NumThreadsToStop = 0;
+
+ public:
+ void Enqueue(TInstant timestamp, TEvInvokeQuery *ev) {
+ std::list<TTask> list;
+ list.emplace_back(timestamp, ev);
+ with_lock (Mutex) {
+ Tasks.splice(Tasks.end(), std::move(list));
+ }
+ CondVar.Signal();
+ }
+
+ bool Dequeue(std::list<TTask>& list, bool *sendNotify) {
+ with_lock (Mutex) {
+ CondVar.Wait(Mutex, [&] { return NumThreadsToStop || !Tasks.empty(); });
+ if (NumThreadsToStop) {
+ *sendNotify = NumThreadsToStop != Max<size_t>();
+ if (*sendNotify) {
+ --NumThreadsToStop;
+ }
+ return false;
+ } else {
+ list.splice(list.end(), Tasks, Tasks.begin());
+ return true;
+ }
+ }
+ }
+
+ void Stop() {
+ with_lock (Mutex) {
+ NumThreadsToStop = Max<size_t>();
+ }
+ CondVar.BroadCast();
+ }
+
+ void StopOne() {
+ with_lock (Mutex) {
+ ++NumThreadsToStop;
+ Y_VERIFY(NumThreadsToStop);
+ }
+ CondVar.Signal();
+ }
+
+ std::optional<TInstant> GetEarliestTaskTimestamp() {
+ with_lock (Mutex) {
+ return Tasks.empty() ? std::nullopt : std::make_optional(Tasks.front().GetTimestamp());
+ }
+ }
+ };
+
+ TTaskQueue TaskQueue;
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // IO dispatcher threads
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ class TThread : public ISimpleThread {
+ TIoDispatcherActor& Actor;
+ TActorSystem* const ActorSystem;
+
+ public:
+ TThread(TIoDispatcherActor& actor, TActorSystem *actorSystem)
+ : Actor(actor)
+ , ActorSystem(actorSystem)
+ {
+ Start();
+ }
+
+ void *ThreadProc() override {
+ SetCurrentThreadName("kikimr IO");
+ for (;;) {
+ std::list<TTask> tasks;
+ bool sendNotify;
+ if (!Actor.TaskQueue.Dequeue(tasks, &sendNotify)) {
+ if (sendNotify) {
+ ActorSystem->Send(new IEventHandle(EvNotifyThreadStopped, 0, Actor.SelfId(), TActorId(),
+ nullptr, TThread::CurrentThreadId()));
+ }
+ break;
+ }
+ for (TTask& task : tasks) {
+ task.Execute();
+ ++*Actor.TasksCompleted;
+ }
+ }
+ return nullptr;
+ }
+ };
+
+ static constexpr size_t MinThreadCount = 4;
+ static constexpr size_t MaxThreadCount = 64;
+ std::map<TThread::TId, std::unique_ptr<TThread>> Threads;
+ size_t NumRunningThreads = 0;
+
+ void StartThread() {
+ auto thread = std::make_unique<TThread>(*this, TlsActivationContext->ExecutorThread.ActorSystem);
+ const TThread::TId id = thread->Id();
+ Threads.emplace(id, std::move(thread));
+ *NumThreads = ++NumRunningThreads;
+ ++*ThreadsStarted;
+ }
+
+ void StopThread() {
+ Y_VERIFY(Threads.size());
+ TaskQueue.StopOne();
+ *NumThreads = --NumRunningThreads;
+ ++*ThreadsStopped;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // Counters
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ NMonitoring::TDynamicCounters::TCounterPtr NumThreads;
+ NMonitoring::TDynamicCounters::TCounterPtr TasksAdded;
+ NMonitoring::TDynamicCounters::TCounterPtr TasksCompleted;
+ NMonitoring::TDynamicCounters::TCounterPtr ThreadsStarted;
+ NMonitoring::TDynamicCounters::TCounterPtr ThreadsStopped;
+
+ public:
+ TIoDispatcherActor(const NMonitoring::TDynamicCounterPtr& counters)
+ : NumThreads(counters->GetCounter("NumThreads"))
+ , TasksAdded(counters->GetCounter("TasksAdded", true))
+ , TasksCompleted(counters->GetCounter("TasksCompleted", true))
+ , ThreadsStarted(counters->GetCounter("ThreadsStarted", true))
+ , ThreadsStopped(counters->GetCounter("ThreadsStopped", true))
+ {}
+
+ ~TIoDispatcherActor() override {
+ TaskQueue.Stop();
+ }
+
+ void Bootstrap() {
+ while (NumRunningThreads < MinThreadCount) {
+ StartThread();
+ }
+ HandleWakeup();
+ Become(&TThis::StateFunc);
+ }
+
+ void HandleThreadStopped(TAutoPtr<IEventHandle> ev) {
+ auto it = Threads.find(ev->Cookie);
+ Y_VERIFY(it != Threads.end());
+ it->second->Join();
+ Threads.erase(it);
+ }
+
+ void Handle(TEvInvokeQuery::TPtr ev) {
+ ++*TasksAdded;
+ TaskQueue.Enqueue(TActivationContext::Now(), ev->Get());
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // Thread usage counter logic
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ std::optional<TInstant> IdleTimestamp;
+ static constexpr TDuration ThreadStartTime = TDuration::MilliSeconds(500);
+ static constexpr TDuration ThreadStopTime = TDuration::MilliSeconds(500);
+
+ void HandleWakeup() {
+ const TInstant now = TActivationContext::Now();
+ std::optional<TInstant> earliest = TaskQueue.GetEarliestTaskTimestamp();
+ if (earliest) {
+ if (now >= *earliest + ThreadStartTime && NumRunningThreads < MaxThreadCount) {
+ StartThread();
+ }
+ IdleTimestamp.reset();
+ } else if (!IdleTimestamp) {
+ IdleTimestamp = now;
+ } else if (now >= *IdleTimestamp + ThreadStopTime) {
+ IdleTimestamp.reset();
+ if (NumRunningThreads > MinThreadCount) {
+ StopThread();
+ }
+ }
+ Schedule(TDuration::MilliSeconds(100), new TEvents::TEvWakeup);
+ }
+
+ STRICT_STFUNC(StateFunc, {
+ fFunc(EvNotifyThreadStopped, HandleThreadStopped);
+ hFunc(TEvInvokeQuery, Handle);
+ cFunc(TEvents::TSystem::Wakeup, HandleWakeup);
+ cFunc(TEvents::TSystem::Poison, PassAway);
+ })
+ };
+
+ IActor *CreateIoDispatcherActor(const NMonitoring::TDynamicCounterPtr& counters) {
+ return new TIoDispatcherActor(counters);
+ }
+
+} // NActors
diff --git a/library/cpp/actors/core/io_dispatcher.h b/library/cpp/actors/core/io_dispatcher.h
new file mode 100644
index 0000000000..b0e4e60d1a
--- /dev/null
+++ b/library/cpp/actors/core/io_dispatcher.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include "actor.h"
+#include "event_local.h"
+#include "events.h"
+#include "actorsystem.h"
+#include "executor_thread.h"
+#include "executelater.h"
+
+namespace NActors {
+
+ struct TEvInvokeQuery : TEventLocal<TEvInvokeQuery, TEvents::TSystem::InvokeQuery> {
+ std::function<void()> Callback;
+
+ TEvInvokeQuery(std::function<void()>&& callback)
+ : Callback(std::move(callback))
+ {}
+ };
+
+ inline TActorId MakeIoDispatcherActorId() {
+ return TActorId(0, TStringBuf("IoDispatcher", 12));
+ }
+
+ extern IActor *CreateIoDispatcherActor(const NMonitoring::TDynamicCounterPtr& counters);
+
+ /* InvokeIoCallback enqueues callback() to be executed in IO thread pool and then return result in TEvInvokeResult
+ * message to parentId actor.
+ */
+ template<typename TCallback>
+ static void InvokeIoCallback(TCallback&& callback, ui32 poolId, IActor::EActivityType activityType) {
+ if (!TActivationContext::Send(new IEventHandle(MakeIoDispatcherActorId(), TActorId(),
+ new TEvInvokeQuery(callback)))) {
+ TActivationContext::Register(CreateExecuteLaterActor(std::move(callback), activityType), TActorId(),
+ TMailboxType::HTSwap, poolId);
+ }
+ }
+
+} // NActors
diff --git a/library/cpp/actors/core/lease.h b/library/cpp/actors/core/lease.h
new file mode 100644
index 0000000000..650ae7b122
--- /dev/null
+++ b/library/cpp/actors/core/lease.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include "defs.h"
+
+namespace NActors {
+ // Value representing specific worker's permission for exclusive use of CPU till specific deadline
+ struct TLease {
+ // Lower WorkerBits store current fast worker id
+ // All other higher bits store expiration (hard preemption) timestamp
+ using TValue = ui64;
+ TValue Value;
+
+ static constexpr ui64 WorkerIdMask = ui64((1ull << WorkerBits) - 1);
+ static constexpr ui64 ExpireTsMask = ~WorkerIdMask;
+
+ explicit constexpr TLease(ui64 value)
+ : Value(value)
+ {}
+
+ constexpr TLease(TWorkerId workerId, ui64 expireTs)
+ : Value((workerId & WorkerIdMask) | (expireTs & ExpireTsMask))
+ {}
+
+ TWorkerId GetWorkerId() const {
+ return Value & WorkerIdMask;
+ }
+
+ TLease NeverExpire() const {
+ return TLease(Value | ExpireTsMask);
+ }
+
+ bool IsNeverExpiring() const {
+ return (Value & ExpireTsMask) == ExpireTsMask;
+ }
+
+ ui64 GetExpireTs() const {
+ // Do not truncate worker id
+ // NOTE: it decrease accuracy, but improves performance
+ return Value;
+ }
+
+ ui64 GetPreciseExpireTs() const {
+ return Value & ExpireTsMask;
+ }
+
+ operator TValue() const {
+ return Value;
+ }
+ };
+
+ // Special expire timestamp values
+ static constexpr ui64 NeverExpire = ui64(-1);
+
+ // Special hard-preemption-in-progress lease
+ static constexpr TLease HardPreemptionLease = TLease(TLease::WorkerIdMask, NeverExpire);
+}
diff --git a/library/cpp/actors/core/log.cpp b/library/cpp/actors/core/log.cpp
new file mode 100644
index 0000000000..5f63b5af58
--- /dev/null
+++ b/library/cpp/actors/core/log.cpp
@@ -0,0 +1,753 @@
+#include "log.h"
+#include "log_settings.h"
+
+#include <library/cpp/monlib/service/pages/templates.h>
+
+static_assert(int(NActors::NLog::PRI_EMERG) == int(::TLOG_EMERG), "expect int(NActors::NLog::PRI_EMERG) == int(::TLOG_EMERG)");
+static_assert(int(NActors::NLog::PRI_ALERT) == int(::TLOG_ALERT), "expect int(NActors::NLog::PRI_ALERT) == int(::TLOG_ALERT)");
+static_assert(int(NActors::NLog::PRI_CRIT) == int(::TLOG_CRIT), "expect int(NActors::NLog::PRI_CRIT) == int(::TLOG_CRIT)");
+static_assert(int(NActors::NLog::PRI_ERROR) == int(::TLOG_ERR), "expect int(NActors::NLog::PRI_ERROR) == int(::TLOG_ERR)");
+static_assert(int(NActors::NLog::PRI_WARN) == int(::TLOG_WARNING), "expect int(NActors::NLog::PRI_WARN) == int(::TLOG_WARNING)");
+static_assert(int(NActors::NLog::PRI_NOTICE) == int(::TLOG_NOTICE), "expect int(NActors::NLog::PRI_NOTICE) == int(::TLOG_NOTICE)");
+static_assert(int(NActors::NLog::PRI_INFO) == int(::TLOG_INFO), "expect int(NActors::NLog::PRI_INFO) == int(::TLOG_INFO)");
+static_assert(int(NActors::NLog::PRI_DEBUG) == int(::TLOG_DEBUG), "expect int(NActors::NLog::PRI_DEBUG) == int(::TLOG_DEBUG)");
+static_assert(int(NActors::NLog::PRI_TRACE) == int(::TLOG_RESOURCES), "expect int(NActors::NLog::PRI_TRACE) == int(::TLOG_RESOURCES)");
+
+namespace {
+ struct TRecordWithNewline {
+ ELogPriority Priority;
+ TTempBuf Buf;
+
+ TRecordWithNewline(const TLogRecord& rec)
+ : Priority(rec.Priority)
+ , Buf(rec.Len + 1)
+ {
+ Buf.Append(rec.Data, rec.Len);
+ *Buf.Proceed(1) = '\n';
+ }
+
+ operator TLogRecord() const {
+ return TLogRecord(Priority, Buf.Data(), Buf.Filled());
+ }
+ };
+}
+
+namespace NActors {
+
+ class TLoggerCounters : public ILoggerMetrics {
+ public:
+ TLoggerCounters(TIntrusivePtr<NMonitoring::TDynamicCounters> counters)
+ : DynamicCounters(counters)
+ {
+ ActorMsgs_ = DynamicCounters->GetCounter("ActorMsgs", true);
+ DirectMsgs_ = DynamicCounters->GetCounter("DirectMsgs", true);
+ LevelRequests_ = DynamicCounters->GetCounter("LevelRequests", true);
+ IgnoredMsgs_ = DynamicCounters->GetCounter("IgnoredMsgs", true);
+ DroppedMsgs_ = DynamicCounters->GetCounter("DroppedMsgs", true);
+
+ AlertMsgs_ = DynamicCounters->GetCounter("AlertMsgs", true);
+ EmergMsgs_ = DynamicCounters->GetCounter("EmergMsgs", true);
+ }
+
+ ~TLoggerCounters() = default;
+
+ void IncActorMsgs() override {
+ ++*ActorMsgs_;
+ }
+ void IncDirectMsgs() override {
+ ++*DirectMsgs_;
+ }
+ void IncLevelRequests() override {
+ ++*LevelRequests_;
+ }
+ void IncIgnoredMsgs() override {
+ ++*IgnoredMsgs_;
+ }
+ void IncAlertMsgs() override {
+ ++*AlertMsgs_;
+ }
+ void IncEmergMsgs() override {
+ ++*EmergMsgs_;
+ }
+ void IncDroppedMsgs() override {
+ DroppedMsgs_->Inc();
+ };
+
+ void GetOutputHtml(IOutputStream& str) override {
+ HTML(str) {
+ DIV_CLASS("row") {
+ DIV_CLASS("col-md-12") {
+ H4() {
+ str << "Counters" << Endl;
+ }
+ DynamicCounters->OutputHtml(str);
+ }
+ }
+ }
+ }
+
+ private:
+ NMonitoring::TDynamicCounters::TCounterPtr ActorMsgs_;
+ NMonitoring::TDynamicCounters::TCounterPtr DirectMsgs_;
+ NMonitoring::TDynamicCounters::TCounterPtr LevelRequests_;
+ NMonitoring::TDynamicCounters::TCounterPtr IgnoredMsgs_;
+ NMonitoring::TDynamicCounters::TCounterPtr AlertMsgs_;
+ NMonitoring::TDynamicCounters::TCounterPtr EmergMsgs_;
+ // Dropped while the logger backend was unavailable
+ NMonitoring::TDynamicCounters::TCounterPtr DroppedMsgs_;
+
+ TIntrusivePtr<NMonitoring::TDynamicCounters> DynamicCounters;
+ };
+
+ class TLoggerMetrics : public ILoggerMetrics {
+ public:
+ TLoggerMetrics(std::shared_ptr<NMonitoring::TMetricRegistry> metrics)
+ : Metrics(metrics)
+ {
+ ActorMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.actor_msgs"}});
+ DirectMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.direct_msgs"}});
+ LevelRequests_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.level_requests"}});
+ IgnoredMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.ignored_msgs"}});
+ DroppedMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.dropped_msgs"}});
+
+ AlertMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.alert_msgs"}});
+ EmergMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.emerg_msgs"}});
+ }
+
+ ~TLoggerMetrics() = default;
+
+ void IncActorMsgs() override {
+ ActorMsgs_->Inc();
+ }
+ void IncDirectMsgs() override {
+ DirectMsgs_->Inc();
+ }
+ void IncLevelRequests() override {
+ LevelRequests_->Inc();
+ }
+ void IncIgnoredMsgs() override {
+ IgnoredMsgs_->Inc();
+ }
+ void IncAlertMsgs() override {
+ AlertMsgs_->Inc();
+ }
+ void IncEmergMsgs() override {
+ EmergMsgs_->Inc();
+ }
+ void IncDroppedMsgs() override {
+ DroppedMsgs_->Inc();
+ };
+
+ void GetOutputHtml(IOutputStream& str) override {
+ HTML(str) {
+ DIV_CLASS("row") {
+ DIV_CLASS("col-md-12") {
+ H4() {
+ str << "Metrics" << Endl;
+ }
+ // TODO: Now, TMetricRegistry does not have the GetOutputHtml function
+ }
+ }
+ }
+ }
+
+ private:
+ NMonitoring::TRate* ActorMsgs_;
+ NMonitoring::TRate* DirectMsgs_;
+ NMonitoring::TRate* LevelRequests_;
+ NMonitoring::TRate* IgnoredMsgs_;
+ NMonitoring::TRate* AlertMsgs_;
+ NMonitoring::TRate* EmergMsgs_;
+ // Dropped while the logger backend was unavailable
+ NMonitoring::TRate* DroppedMsgs_;
+
+ std::shared_ptr<NMonitoring::TMetricRegistry> Metrics;
+ };
+
+ TAtomic TLoggerActor::IsOverflow = 0;
+
+ TLoggerActor::TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+ TAutoPtr<TLogBackend> logBackend,
+ TIntrusivePtr<NMonitoring::TDynamicCounters> counters)
+ : TActor(&TLoggerActor::StateFunc)
+ , Settings(settings)
+ , LogBackend(logBackend.Release())
+ , Metrics(std::make_unique<TLoggerCounters>(counters))
+ {
+ }
+
+ TLoggerActor::TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+ std::shared_ptr<TLogBackend> logBackend,
+ TIntrusivePtr<NMonitoring::TDynamicCounters> counters)
+ : TActor(&TLoggerActor::StateFunc)
+ , Settings(settings)
+ , LogBackend(logBackend)
+ , Metrics(std::make_unique<TLoggerCounters>(counters))
+ {
+ }
+
+ TLoggerActor::TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+ TAutoPtr<TLogBackend> logBackend,
+ std::shared_ptr<NMonitoring::TMetricRegistry> metrics)
+ : TActor(&TLoggerActor::StateFunc)
+ , Settings(settings)
+ , LogBackend(logBackend.Release())
+ , Metrics(std::make_unique<TLoggerMetrics>(metrics))
+ {
+ }
+
+ TLoggerActor::TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+ std::shared_ptr<TLogBackend> logBackend,
+ std::shared_ptr<NMonitoring::TMetricRegistry> metrics)
+ : TActor(&TLoggerActor::StateFunc)
+ , Settings(settings)
+ , LogBackend(logBackend)
+ , Metrics(std::make_unique<TLoggerMetrics>(metrics))
+ {
+ }
+
+ TLoggerActor::~TLoggerActor() {
+ }
+
+ void TLoggerActor::Log(TInstant time, NLog::EPriority priority, NLog::EComponent component, const char* c, ...) {
+ Metrics->IncDirectMsgs();
+ if (Settings && Settings->Satisfies(priority, component, 0ull)) {
+ va_list params;
+ va_start(params, c);
+ TString formatted;
+ vsprintf(formatted, c, params);
+
+ auto ok = OutputRecord(time, NLog::EPrio(priority), component, formatted);
+ Y_UNUSED(ok);
+ va_end(params);
+ }
+ }
+
+ void TLoggerActor::Throttle(const NLog::TSettings& settings) {
+ if (AtomicGet(IsOverflow))
+ Sleep(settings.ThrottleDelay);
+ }
+
+ void TLoggerActor::LogIgnoredCount(TInstant now) {
+ TString message = Sprintf("Ignored IgnoredCount# %" PRIu64 " log records due to logger overflow!", IgnoredCount);
+ if (!OutputRecord(now, NActors::NLog::EPrio::Error, Settings->LoggerComponent, message)) {
+ BecomeDefunct();
+ }
+ }
+
+ void TLoggerActor::HandleIgnoredEvent(TLogIgnored::TPtr& ev, const NActors::TActorContext& ctx) {
+ Y_UNUSED(ev);
+ LogIgnoredCount(ctx.Now());
+ IgnoredCount = 0;
+ PassedCount = 0;
+ }
+
+ void TLoggerActor::HandleIgnoredEventDrop() {
+ // logger backend is unavailable, just ignore
+ }
+
+ void TLoggerActor::WriteMessageStat(const NLog::TEvLog& ev) {
+ Metrics->IncActorMsgs();
+
+ const auto prio = ev.Level.ToPrio();
+
+ switch (prio) {
+ case ::NActors::NLog::EPrio::Alert:
+ Metrics->IncAlertMsgs();
+ break;
+ case ::NActors::NLog::EPrio::Emerg:
+ Metrics->IncEmergMsgs();
+ break;
+ default:
+ break;
+ }
+
+ }
+
+ void TLoggerActor::HandleLogEvent(NLog::TEvLog::TPtr& ev, const NActors::TActorContext& ctx) {
+ i64 delayMillisec = (ctx.Now() - ev->Get()->Stamp).MilliSeconds();
+ WriteMessageStat(*ev->Get());
+ if (Settings->AllowDrop) {
+ // Disable throttling if it was enabled previously
+ if (AtomicGet(IsOverflow))
+ AtomicSet(IsOverflow, 0);
+
+ // Check if some records have to be dropped
+ if ((PassedCount > 10 && delayMillisec > (i64)Settings->TimeThresholdMs) || IgnoredCount > 0) {
+ Metrics->IncIgnoredMsgs();
+ if (IgnoredCount == 0) {
+ ctx.Send(ctx.SelfID, new TLogIgnored());
+ }
+ ++IgnoredCount;
+ PassedCount = 0;
+ return;
+ }
+ PassedCount++;
+ } else {
+ // Enable of disable throttling depending on the load
+ if (delayMillisec > (i64)Settings->TimeThresholdMs && !AtomicGet(IsOverflow))
+ AtomicSet(IsOverflow, 1);
+ else if (delayMillisec <= (i64)Settings->TimeThresholdMs && AtomicGet(IsOverflow))
+ AtomicSet(IsOverflow, 0);
+ }
+
+ const auto prio = ev->Get()->Level.ToPrio();
+ if (!OutputRecord(ev->Get()->Stamp, prio, ev->Get()->Component, ev->Get()->Line)) {
+ BecomeDefunct();
+ }
+ }
+
+ void TLoggerActor::BecomeDefunct() {
+ Become(&TThis::StateDefunct);
+ Schedule(WakeupInterval, new TEvents::TEvWakeup);
+ }
+
+ void TLoggerActor::HandleLogComponentLevelRequest(TLogComponentLevelRequest::TPtr& ev, const NActors::TActorContext& ctx) {
+ Metrics->IncLevelRequests();
+ TString explanation;
+ int code = Settings->SetLevel(ev->Get()->Priority, ev->Get()->Component, explanation);
+ ctx.Send(ev->Sender, new TLogComponentLevelResponse(code, explanation));
+ }
+
+ void TLoggerActor::RenderComponentPriorities(IOutputStream& str) {
+ using namespace NLog;
+ HTML(str) {
+ H4() {
+ str << "Priority Settings for the Components";
+ }
+ TABLE_SORTABLE_CLASS("table") {
+ TABLEHEAD() {
+ TABLER() {
+ TABLEH() {
+ str << "Component";
+ }
+ TABLEH() {
+ str << "Level";
+ }
+ TABLEH() {
+ str << "Sampling Level";
+ }
+ TABLEH() {
+ str << "Sampling Rate";
+ }
+ }
+ }
+ TABLEBODY() {
+ for (EComponent i = Settings->MinVal; i < Settings->MaxVal; i++) {
+ auto name = Settings->ComponentName(i);
+ if (!*name)
+ continue;
+ NLog::TComponentSettings componentSettings = Settings->GetComponentSettings(i);
+
+ TABLER() {
+ TABLED() {
+ str << "<a href='logger?c=" << i << "'>" << name << "</a>";
+ }
+ TABLED() {
+ str << PriorityToString(EPrio(componentSettings.Raw.X.Level));
+ }
+ TABLED() {
+ str << PriorityToString(EPrio(componentSettings.Raw.X.SamplingLevel));
+ }
+ TABLED() {
+ str << componentSettings.Raw.X.SamplingRate;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * Logger INFO:
+ * 1. Current priority settings from components
+ * 2. Number of log messages (via actors events, directly)
+ * 3. Number of messages per components, per priority
+ * 4. Log level changes (last N changes)
+ */
+ void TLoggerActor::HandleMonInfo(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx) {
+ const auto& params = ev->Get()->Request.GetParams();
+ NLog::EComponent component = NLog::InvalidComponent;
+ NLog::EPriority priority = NLog::PRI_DEBUG;
+ NLog::EPriority samplingPriority = NLog::PRI_DEBUG;
+ ui32 samplingRate = 0;
+ bool hasComponent = false;
+ bool hasPriority = false;
+ bool hasSamplingPriority = false;
+ bool hasSamplingRate = false;
+ bool hasAllowDrop = false;
+ int allowDrop = 0;
+ if (params.Has("c")) {
+ if (TryFromString(params.Get("c"), component) && (component == NLog::InvalidComponent || Settings->IsValidComponent(component))) {
+ hasComponent = true;
+ if (params.Has("p")) {
+ int rawPriority;
+ if (TryFromString(params.Get("p"), rawPriority) && NLog::TSettings::IsValidPriority((NLog::EPriority)rawPriority)) {
+ priority = (NLog::EPriority)rawPriority;
+ hasPriority = true;
+ }
+ }
+ if (params.Has("sp")) {
+ int rawPriority;
+ if (TryFromString(params.Get("sp"), rawPriority) && NLog::TSettings::IsValidPriority((NLog::EPriority)rawPriority)) {
+ samplingPriority = (NLog::EPriority)rawPriority;
+ hasSamplingPriority = true;
+ }
+ }
+ if (params.Has("sr")) {
+ if (TryFromString(params.Get("sr"), samplingRate)) {
+ hasSamplingRate = true;
+ }
+ }
+ }
+ }
+ if (params.Has("allowdrop")) {
+ if (TryFromString(params.Get("allowdrop"), allowDrop)) {
+ hasAllowDrop = true;
+ }
+ }
+
+ TStringStream str;
+ if (hasComponent && !hasPriority && !hasSamplingPriority && !hasSamplingRate) {
+ NLog::TComponentSettings componentSettings = Settings->GetComponentSettings(component);
+ ui32 samplingRate = componentSettings.Raw.X.SamplingRate;
+ HTML(str) {
+ DIV_CLASS("row") {
+ DIV_CLASS("col-md-12") {
+ H4() {
+ str << "Current log settings for " << Settings->ComponentName(component) << Endl;
+ }
+ UL() {
+ LI() {
+ str << "Priority: "
+ << NLog::PriorityToString(NLog::EPrio(componentSettings.Raw.X.Level));
+ }
+ LI() {
+ str << "Sampling priority: "
+ << NLog::PriorityToString(NLog::EPrio(componentSettings.Raw.X.SamplingLevel));
+ }
+ LI() {
+ str << "Sampling rate: "
+ << samplingRate;
+ }
+ }
+ }
+ }
+
+ DIV_CLASS("row") {
+ DIV_CLASS("col-md-12") {
+ H4() {
+ str << "Change priority" << Endl;
+ }
+ UL() {
+ for (int p = NLog::PRI_EMERG; p <= NLog::PRI_TRACE; ++p) {
+ LI() {
+ str << "<a href='logger?c=" << component << "&p=" << p << "'>"
+ << NLog::PriorityToString(NLog::EPrio(p)) << "</a>";
+ }
+ }
+ }
+ H4() {
+ str << "Change sampling priority" << Endl;
+ }
+ UL() {
+ for (int p = NLog::PRI_EMERG; p <= NLog::PRI_TRACE; ++p) {
+ LI() {
+ str << "<a href='logger?c=" << component << "&sp=" << p << "'>"
+ << NLog::PriorityToString(NLog::EPrio(p)) << "</a>";
+ }
+ }
+ }
+ H4() {
+ str << "Change sampling rate" << Endl;
+ }
+ str << "<form method=\"GET\">" << Endl;
+ str << "Rate: <input type=\"number\" name=\"sr\" value=\"" << samplingRate << "\"/>" << Endl;
+ str << "<input type=\"hidden\" name=\"c\" value=\"" << component << "\">" << Endl;
+ str << "<input class=\"btn btn-primary\" type=\"submit\" value=\"Change\"/>" << Endl;
+ str << "</form>" << Endl;
+ H4() {
+ str << "<a href='logger'>Cancel</a>" << Endl;
+ }
+ }
+ }
+ }
+
+ } else {
+ TString explanation;
+ if (hasComponent && hasPriority) {
+ Settings->SetLevel(priority, component, explanation);
+ }
+ if (hasComponent && hasSamplingPriority) {
+ Settings->SetSamplingLevel(samplingPriority, component, explanation);
+ }
+ if (hasComponent && hasSamplingRate) {
+ Settings->SetSamplingRate(samplingRate, component, explanation);
+ }
+ if (hasAllowDrop) {
+ Settings->SetAllowDrop(allowDrop);
+ }
+
+ HTML(str) {
+ if (!explanation.empty()) {
+ DIV_CLASS("row") {
+ DIV_CLASS("col-md-12 alert alert-info") {
+ str << explanation;
+ }
+ }
+ }
+
+ DIV_CLASS("row") {
+ DIV_CLASS("col-md-6") {
+ RenderComponentPriorities(str);
+ }
+ DIV_CLASS("col-md-6") {
+ H4() {
+ str << "Change priority for all components";
+ }
+ TABLE_CLASS("table table-condensed") {
+ TABLEHEAD() {
+ TABLER() {
+ TABLEH() {
+ str << "Priority";
+ }
+ }
+ }
+ TABLEBODY() {
+ for (int p = NLog::PRI_EMERG; p <= NLog::PRI_TRACE; ++p) {
+ TABLER() {
+ TABLED() {
+ str << "<a href = 'logger?c=-1&p=" << p << "'>"
+ << NLog::PriorityToString(NLog::EPrio(p)) << "</a>";
+ }
+ }
+ }
+ }
+ }
+ H4() {
+ str << "Change sampling priority for all components";
+ }
+ TABLE_CLASS("table table-condensed") {
+ TABLEHEAD() {
+ TABLER() {
+ TABLEH() {
+ str << "Priority";
+ }
+ }
+ }
+ TABLEBODY() {
+ for (int p = NLog::PRI_EMERG; p <= NLog::PRI_TRACE; ++p) {
+ TABLER() {
+ TABLED() {
+ str << "<a href = 'logger?c=-1&sp=" << p << "'>"
+ << NLog::PriorityToString(NLog::EPrio(p)) << "</a>";
+ }
+ }
+ }
+ }
+ }
+ H4() {
+ str << "Change sampling rate for all components";
+ }
+ str << "<form method=\"GET\">" << Endl;
+ str << "Rate: <input type=\"number\" name=\"sr\" value=\"0\"/>" << Endl;
+ str << "<input type=\"hidden\" name=\"c\" value=\"-1\">" << Endl;
+ str << "<input class=\"btn btn-primary\" type=\"submit\" value=\"Change\"/>" << Endl;
+ str << "</form>" << Endl;
+ H4() {
+ str << "Drop log entries in case of overflow: "
+ << (Settings->AllowDrop ? "Enabled" : "Disabled");
+ }
+ str << "<form method=\"GET\">" << Endl;
+ str << "<input type=\"hidden\" name=\"allowdrop\" value=\"" << (Settings->AllowDrop ? "0" : "1") << "\"/>" << Endl;
+ str << "<input class=\"btn btn-primary\" type=\"submit\" value=\"" << (Settings->AllowDrop ? "Disable" : "Enable") << "\"/>" << Endl;
+ str << "</form>" << Endl;
+ }
+ }
+ Metrics->GetOutputHtml(str);
+ }
+ }
+
+ ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(str.Str()));
+ }
+
+ constexpr size_t TimeBufSize = 512;
+
+ bool TLoggerActor::OutputRecord(TInstant time, NLog::EPrio priority, NLog::EComponent component,
+ const TString& formatted) noexcept try {
+ const auto logPrio = ::ELogPriority(ui16(priority));
+
+ char buf[TimeBufSize];
+ switch (Settings->Format) {
+ case NActors::NLog::TSettings::PLAIN_FULL_FORMAT: {
+ TStringBuilder logRecord;
+ if (Settings->UseLocalTimestamps) {
+ logRecord << FormatLocalTimestamp(time, buf);
+ } else {
+ logRecord << time;
+ }
+ logRecord
+ << Settings->MessagePrefix
+ << " :" << Settings->ComponentName(component)
+ << " " << PriorityToString(priority)
+ << ": " << formatted;
+ LogBackend->WriteData(
+ TLogRecord(logPrio, logRecord.data(), logRecord.size()));
+ } break;
+
+ case NActors::NLog::TSettings::PLAIN_SHORT_FORMAT: {
+ TStringBuilder logRecord;
+ logRecord
+ << Settings->ComponentName(component)
+ << ": " << formatted;
+ LogBackend->WriteData(
+ TLogRecord(logPrio, logRecord.data(), logRecord.size()));
+ } break;
+
+ case NActors::NLog::TSettings::JSON_FORMAT: {
+ NJsonWriter::TBuf json;
+ json.BeginObject()
+ .WriteKey("@timestamp")
+ .WriteString(Settings->UseLocalTimestamps ? FormatLocalTimestamp(time, buf) : time.ToString().data())
+ .WriteKey("microseconds")
+ .WriteULongLong(time.MicroSeconds())
+ .WriteKey("host")
+ .WriteString(Settings->ShortHostName)
+ .WriteKey("cluster")
+ .WriteString(Settings->ClusterName)
+ .WriteKey("priority")
+ .WriteString(PriorityToString(priority))
+ .WriteKey("npriority")
+ .WriteInt((int)priority)
+ .WriteKey("component")
+ .WriteString(Settings->ComponentName(component))
+ .WriteKey("tag")
+ .WriteString("KIKIMR")
+ .WriteKey("revision")
+ .WriteInt(GetProgramSvnRevision())
+ .WriteKey("message")
+ .WriteString(formatted)
+ .EndObject();
+ auto logRecord = json.Str();
+ LogBackend->WriteData(
+ TLogRecord(logPrio, logRecord.data(), logRecord.size()));
+ } break;
+ }
+
+ return true;
+ } catch (...) {
+ return false;
+ }
+
+ void TLoggerActor::HandleLogEventDrop(const NLog::TEvLog::TPtr& ev) {
+ WriteMessageStat(*ev->Get());
+ Metrics->IncDroppedMsgs();
+ }
+
+ void TLoggerActor::HandleWakeup() {
+ Become(&TThis::StateFunc);
+ }
+
+ const char* TLoggerActor::FormatLocalTimestamp(TInstant time, char* buf) {
+ struct tm localTime;
+ time.LocalTime(&localTime);
+ int r = strftime(buf, TimeBufSize, "%Y-%m-%d-%H-%M-%S", &localTime);
+ Y_VERIFY(r != 0);
+ return buf;
+ }
+
+ TAutoPtr<TLogBackend> CreateSysLogBackend(const TString& ident,
+ bool logPError, bool logCons) {
+ int flags = 0;
+ if (logPError)
+ flags |= TSysLogBackend::LogPerror;
+ if (logCons)
+ flags |= TSysLogBackend::LogCons;
+
+ return new TSysLogBackend(ident.data(), TSysLogBackend::TSYSLOG_LOCAL1, flags);
+ }
+
+ class TStderrBackend: public TLogBackend {
+ public:
+ TStderrBackend() {
+ }
+ void WriteData(const TLogRecord& rec) override {
+#ifdef _MSC_VER
+ if (IsDebuggerPresent()) {
+ TString x;
+ x.reserve(rec.Len + 2);
+ x.append(rec.Data, rec.Len);
+ x.append('\n');
+ OutputDebugString(x.c_str());
+ }
+#endif
+ bool isOk = false;
+ do {
+ try {
+ TRecordWithNewline r(rec);
+ Cerr.Write(r.Buf.Data(), r.Buf.Filled());
+ isOk = true;
+ } catch (TSystemError err) {
+ // Interrupted system call
+ Y_UNUSED(err);
+ }
+ } while (!isOk);
+ }
+
+ void ReopenLog() override {
+ }
+
+ private:
+ const TString Indent;
+ };
+
+ class TLineFileLogBackend: public TFileLogBackend {
+ public:
+ TLineFileLogBackend(const TString& path)
+ : TFileLogBackend(path)
+ {
+ }
+
+ // Append newline after every record
+ void WriteData(const TLogRecord& rec) override {
+ TFileLogBackend::WriteData(TRecordWithNewline(rec));
+ }
+ };
+
+ class TCompositeLogBackend: public TLogBackend {
+ public:
+ TCompositeLogBackend(TVector<TAutoPtr<TLogBackend>>&& underlyingBackends)
+ : UnderlyingBackends(std::move(underlyingBackends))
+ {
+ }
+
+ void WriteData(const TLogRecord& rec) override {
+ for (auto& b: UnderlyingBackends) {
+ b->WriteData(rec);
+ }
+ }
+
+ void ReopenLog() override {
+ }
+
+ private:
+ TVector<TAutoPtr<TLogBackend>> UnderlyingBackends;
+ };
+
+ TAutoPtr<TLogBackend> CreateStderrBackend() {
+ return new TStderrBackend();
+ }
+
+ TAutoPtr<TLogBackend> CreateFileBackend(const TString& fileName) {
+ return new TLineFileLogBackend(fileName);
+ }
+
+ TAutoPtr<TLogBackend> CreateNullBackend() {
+ return new TNullLogBackend();
+ }
+
+ TAutoPtr<TLogBackend> CreateCompositeLogBackend(TVector<TAutoPtr<TLogBackend>>&& underlyingBackends) {
+ return new TCompositeLogBackend(std::move(underlyingBackends));
+ }
+}
diff --git a/library/cpp/actors/core/log.h b/library/cpp/actors/core/log.h
new file mode 100644
index 0000000000..c11a7cf3c1
--- /dev/null
+++ b/library/cpp/actors/core/log.h
@@ -0,0 +1,369 @@
+#pragma once
+
+#include "defs.h"
+
+#include "log_iface.h"
+#include "log_settings.h"
+#include "actorsystem.h"
+#include "events.h"
+#include "event_local.h"
+#include "hfunc.h"
+#include "mon.h"
+
+#include <util/generic/vector.h>
+#include <util/string/printf.h>
+#include <util/string/builder.h>
+#include <library/cpp/logger/all.h>
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+#include <library/cpp/monlib/metrics/metric_registry.h>
+#include <library/cpp/json/writer/json.h>
+#include <library/cpp/svnversion/svnversion.h>
+
+#include <library/cpp/actors/memory_log/memlog.h>
+
+// TODO: limit number of messages per second
+// TODO: make TLogComponentLevelRequest/Response network messages
+
+#define IS_LOG_PRIORITY_ENABLED(actorCtxOrSystem, priority, component) \
+ (static_cast<::NActors::NLog::TSettings*>((actorCtxOrSystem).LoggerSettings()) && \
+ static_cast<::NActors::NLog::TSettings*>((actorCtxOrSystem).LoggerSettings())->Satisfies( \
+ static_cast<::NActors::NLog::EPriority>(priority), \
+ static_cast<::NActors::NLog::EComponent>(component), \
+ 0ull) \
+ )
+
+#define LOG_LOG_SAMPLED_BY(actorCtxOrSystem, priority, component, sampleBy, ...) \
+ do { \
+ ::NActors::NLog::TSettings* mSettings = static_cast<::NActors::NLog::TSettings*>((actorCtxOrSystem).LoggerSettings()); \
+ ::NActors::NLog::EPriority mPriority = static_cast<::NActors::NLog::EPriority>(priority); \
+ ::NActors::NLog::EComponent mComponent = static_cast<::NActors::NLog::EComponent>(component); \
+ if (mSettings && mSettings->Satisfies(mPriority, mComponent, sampleBy)) { \
+ ::NActors::MemLogAdapter( \
+ actorCtxOrSystem, priority, component, __VA_ARGS__); \
+ } \
+ } while (0) /**/
+
+#define LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, priority, component, sampleBy, stream) \
+ LOG_LOG_SAMPLED_BY(actorCtxOrSystem, priority, component, sampleBy, "%s", [&]() { \
+ TStringBuilder logStringBuilder; \
+ logStringBuilder << stream; \
+ return static_cast<TString>(logStringBuilder); \
+ }().data())
+
+#define LOG_LOG(actorCtxOrSystem, priority, component, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, priority, component, 0ull, __VA_ARGS__)
+#define LOG_LOG_S(actorCtxOrSystem, priority, component, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, priority, component, 0ull, stream)
+
+// use these macros for logging via actor system or actor context
+#define LOG_EMERG(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_EMERG, component, __VA_ARGS__)
+#define LOG_ALERT(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_ALERT, component, __VA_ARGS__)
+#define LOG_CRIT(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_CRIT, component, __VA_ARGS__)
+#define LOG_ERROR(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_ERROR, component, __VA_ARGS__)
+#define LOG_WARN(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_WARN, component, __VA_ARGS__)
+#define LOG_NOTICE(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_NOTICE, component, __VA_ARGS__)
+#define LOG_INFO(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_INFO, component, __VA_ARGS__)
+#define LOG_DEBUG(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_DEBUG, component, __VA_ARGS__)
+#define LOG_TRACE(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_TRACE, component, __VA_ARGS__)
+
+#define LOG_EMERG_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_EMERG, component, stream)
+#define LOG_ALERT_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_ALERT, component, stream)
+#define LOG_CRIT_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_CRIT, component, stream)
+#define LOG_ERROR_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_ERROR, component, stream)
+#define LOG_WARN_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_WARN, component, stream)
+#define LOG_NOTICE_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_NOTICE, component, stream)
+#define LOG_INFO_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_INFO, component, stream)
+#define LOG_DEBUG_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_DEBUG, component, stream)
+#define LOG_TRACE_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_TRACE, component, stream)
+
+#define LOG_EMERG_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_EMERG, component, sampleBy, __VA_ARGS__)
+#define LOG_ALERT_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_ALERT, component, sampleBy, __VA_ARGS__)
+#define LOG_CRIT_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_CRIT, component, sampleBy, __VA_ARGS__)
+#define LOG_ERROR_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_ERROR, component, sampleBy, __VA_ARGS__)
+#define LOG_WARN_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_WARN, component, sampleBy, __VA_ARGS__)
+#define LOG_NOTICE_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_NOTICE, component, sampleBy, __VA_ARGS__)
+#define LOG_INFO_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_INFO, component, sampleBy, __VA_ARGS__)
+#define LOG_DEBUG_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_DEBUG, component, sampleBy, __VA_ARGS__)
+#define LOG_TRACE_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_TRACE, component, sampleBy, __VA_ARGS__)
+
+#define LOG_EMERG_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_EMERG, component, sampleBy, stream)
+#define LOG_ALERT_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_ALERT, component, sampleBy, stream)
+#define LOG_CRIT_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_CRIT, component, sampleBy, stream)
+#define LOG_ERROR_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_ERROR, component, sampleBy, stream)
+#define LOG_WARN_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_WARN, component, sampleBy, stream)
+#define LOG_NOTICE_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_NOTICE, component, sampleBy, stream)
+#define LOG_INFO_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_INFO, component, sampleBy, stream)
+#define LOG_DEBUG_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_DEBUG, component, sampleBy, stream)
+#define LOG_TRACE_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_TRACE, component, sampleBy, stream)
+
+// Log Throttling
+#define LOG_LOG_THROTTLE(throttler, actorCtxOrSystem, priority, component, ...) \
+ do { \
+ if ((throttler).Kick()) { \
+ LOG_LOG(actorCtxOrSystem, priority, component, __VA_ARGS__); \
+ } \
+ } while (0) /**/
+
+#define TRACE_EVENT(component) \
+ const auto& currentTracer = component; \
+ if (ev->HasEvent()) { \
+ LOG_TRACE(*TlsActivationContext, currentTracer, "%s, received event# %" PRIu32 ", Sender %s, Recipient %s: %s", \
+ __FUNCTION__, ev->Type, ev->Sender.ToString().data(), SelfId().ToString().data(), ev->GetBase()->ToString().substr(0, 1000).data()); \
+ } else { \
+ LOG_TRACE(*TlsActivationContext, currentTracer, "%s, received event# %" PRIu32 ", Sender %s, Recipient %s", \
+ __FUNCTION__, ev->Type, ev->Sender.ToString().data(), ev->Recipient.ToString().data()); \
+ }
+#define TRACE_EVENT_TYPE(eventType) LOG_TRACE(*TlsActivationContext, currentTracer, "%s, processing event %s", __FUNCTION__, eventType)
+
+class TLog;
+class TLogBackend;
+
+namespace NActors {
+ class TLoggerActor;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // SET LOG LEVEL FOR A COMPONENT
+ ////////////////////////////////////////////////////////////////////////////////
+ class TLogComponentLevelRequest: public TEventLocal<TLogComponentLevelRequest, int(NLog::EEv::LevelReq)> {
+ public:
+ // set given priority for the component
+ TLogComponentLevelRequest(NLog::EPriority priority, NLog::EComponent component)
+ : Priority(priority)
+ , Component(component)
+ {
+ }
+
+ // set given priority for all components
+ TLogComponentLevelRequest(NLog::EPriority priority)
+ : Priority(priority)
+ , Component(NLog::InvalidComponent)
+ {
+ }
+
+ protected:
+ NLog::EPriority Priority;
+ NLog::EComponent Component;
+
+ friend class TLoggerActor;
+ };
+
+ class TLogComponentLevelResponse: public TEventLocal<TLogComponentLevelResponse, int(NLog::EEv::LevelResp)> {
+ public:
+ TLogComponentLevelResponse(int code, const TString& explanation)
+ : Code(code)
+ , Explanation(explanation)
+ {
+ }
+
+ int GetCode() const {
+ return Code;
+ }
+
+ const TString& GetExplanation() const {
+ return Explanation;
+ }
+
+ protected:
+ int Code;
+ TString Explanation;
+ };
+
+ class TLogIgnored: public TEventLocal<TLogIgnored, int(NLog::EEv::Ignored)> {
+ public:
+ TLogIgnored() {
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // LOGGER ACTOR
+ ////////////////////////////////////////////////////////////////////////////////
+ class ILoggerMetrics {
+ public:
+ virtual ~ILoggerMetrics() = default;
+
+ virtual void IncActorMsgs() = 0;
+ virtual void IncDirectMsgs() = 0;
+ virtual void IncLevelRequests() = 0;
+ virtual void IncIgnoredMsgs() = 0;
+ virtual void IncAlertMsgs() = 0;
+ virtual void IncEmergMsgs() = 0;
+ virtual void IncDroppedMsgs() = 0;
+
+ virtual void GetOutputHtml(IOutputStream&) = 0;
+ };
+
+ class TLoggerActor: public TActor<TLoggerActor> {
+ public:
+ static constexpr IActor::EActivityType ActorActivityType() {
+ return IActor::LOG_ACTOR;
+ }
+
+ TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+ TAutoPtr<TLogBackend> logBackend,
+ TIntrusivePtr<NMonitoring::TDynamicCounters> counters);
+ TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+ std::shared_ptr<TLogBackend> logBackend,
+ TIntrusivePtr<NMonitoring::TDynamicCounters> counters);
+ TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+ TAutoPtr<TLogBackend> logBackend,
+ std::shared_ptr<NMonitoring::TMetricRegistry> metrics);
+ TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+ std::shared_ptr<TLogBackend> logBackend,
+ std::shared_ptr<NMonitoring::TMetricRegistry> metrics);
+ ~TLoggerActor();
+
+ void StateFunc(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(TLogIgnored, HandleIgnoredEvent);
+ HFunc(NLog::TEvLog, HandleLogEvent);
+ HFunc(TLogComponentLevelRequest, HandleLogComponentLevelRequest);
+ HFunc(NMon::TEvHttpInfo, HandleMonInfo);
+ }
+ }
+
+ STFUNC(StateDefunct) {
+ switch (ev->GetTypeRewrite()) {
+ cFunc(TLogIgnored::EventType, HandleIgnoredEventDrop);
+ hFunc(NLog::TEvLog, HandleLogEventDrop);
+ HFunc(TLogComponentLevelRequest, HandleLogComponentLevelRequest);
+ HFunc(NMon::TEvHttpInfo, HandleMonInfo);
+ cFunc(TEvents::TEvWakeup::EventType, HandleWakeup);
+ }
+ }
+
+ // Directly call logger instead of sending a message
+ void Log(TInstant time, NLog::EPriority priority, NLog::EComponent component, const char* c, ...);
+
+ static void Throttle(const NLog::TSettings& settings);
+
+ private:
+ TIntrusivePtr<NLog::TSettings> Settings;
+ std::shared_ptr<TLogBackend> LogBackend;
+ ui64 IgnoredCount = 0;
+ ui64 PassedCount = 0;
+ static TAtomic IsOverflow;
+ TDuration WakeupInterval{TDuration::Seconds(5)};
+ std::unique_ptr<ILoggerMetrics> Metrics;
+
+ void BecomeDefunct();
+ void HandleIgnoredEvent(TLogIgnored::TPtr& ev, const NActors::TActorContext& ctx);
+ void HandleIgnoredEventDrop();
+ void HandleLogEvent(NLog::TEvLog::TPtr& ev, const TActorContext& ctx);
+ void HandleLogEventDrop(const NLog::TEvLog::TPtr& ev);
+ void HandleLogComponentLevelRequest(TLogComponentLevelRequest::TPtr& ev, const TActorContext& ctx);
+ void HandleMonInfo(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx);
+ void HandleWakeup();
+ [[nodiscard]] bool OutputRecord(TInstant time, NLog::EPrio priority, NLog::EComponent component, const TString& formatted) noexcept;
+ void RenderComponentPriorities(IOutputStream& str);
+ void LogIgnoredCount(TInstant now);
+ void WriteMessageStat(const NLog::TEvLog& ev);
+ static const char* FormatLocalTimestamp(TInstant time, char* buf);
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // LOG THROTTLING
+ // TTrivialLogThrottler -- log a message every 'period' duration
+ // Use case:
+ // TTrivialLogThrottler throttler(TDuration::Minutes(1));
+ // ....
+ // LOG_LOG_THROTTLE(throttler, ctx, NActors::NLog::PRI_ERROR, SOME, "Error");
+ ////////////////////////////////////////////////////////////////////////////////
+ class TTrivialLogThrottler {
+ public:
+ TTrivialLogThrottler(TDuration period)
+ : Period(period)
+ {
+ }
+
+ // return value:
+ // true -- write to log
+ // false -- don't write to log, throttle
+ bool Kick() {
+ auto now = TInstant::Now();
+ if (now >= (LastWrite + Period)) {
+ LastWrite = now;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private:
+ TInstant LastWrite;
+ TDuration Period;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // SYSLOG BACKEND
+ ////////////////////////////////////////////////////////////////////////////////
+ TAutoPtr<TLogBackend> CreateSysLogBackend(const TString& ident,
+ bool logPError, bool logCons);
+ TAutoPtr<TLogBackend> CreateStderrBackend();
+ TAutoPtr<TLogBackend> CreateFileBackend(const TString& fileName);
+ TAutoPtr<TLogBackend> CreateNullBackend();
+ TAutoPtr<TLogBackend> CreateCompositeLogBackend(TVector<TAutoPtr<TLogBackend>>&& underlyingBackends);
+
+ /////////////////////////////////////////////////////////////////////
+ // Logging adaptors for memory log and logging into filesystem
+ /////////////////////////////////////////////////////////////////////
+
+ namespace NDetail {
+ inline void Y_PRINTF_FORMAT(2, 3) PrintfV(TString& dst, const char* format, ...) {
+ va_list params;
+ va_start(params, format);
+ vsprintf(dst, format, params);
+ va_end(params);
+ }
+
+ inline void PrintfV(TString& dst, const char* format, va_list params) {
+ vsprintf(dst, format, params);
+ }
+ } // namespace NDetail
+
+ template <typename TCtx>
+ inline void DeliverLogMessage(TCtx& ctx, NLog::EPriority mPriority, NLog::EComponent mComponent, TString &&str)
+ {
+ const NLog::TSettings *mSettings = ctx.LoggerSettings();
+ TLoggerActor::Throttle(*mSettings);
+ ctx.Send(new IEventHandle(mSettings->LoggerActorId, TActorId(), new NLog::TEvLog(mPriority, mComponent, std::move(str))));
+ }
+
+ template <typename TCtx, typename... TArgs>
+ inline void MemLogAdapter(
+ TCtx& actorCtxOrSystem,
+ NLog::EPriority mPriority,
+ NLog::EComponent mComponent,
+ const char* format, TArgs&&... params) {
+ TString Formatted;
+
+
+ if constexpr (sizeof... (params) > 0) {
+ NDetail::PrintfV(Formatted, format, std::forward<TArgs>(params)...);
+ } else {
+ NDetail::PrintfV(Formatted, "%s", format);
+ }
+
+ MemLogWrite(Formatted.data(), Formatted.size(), true);
+ DeliverLogMessage(actorCtxOrSystem, mPriority, mComponent, std::move(Formatted));
+ }
+
+ template <typename TCtx>
+ Y_WRAPPER inline void MemLogAdapter(
+ TCtx& actorCtxOrSystem,
+ NLog::EPriority mPriority,
+ NLog::EComponent mComponent,
+ const TString& str) {
+
+ MemLogWrite(str.data(), str.size(), true);
+ DeliverLogMessage(actorCtxOrSystem, mPriority, mComponent, TString(str));
+ }
+
+ template <typename TCtx>
+ Y_WRAPPER inline void MemLogAdapter(
+ TCtx& actorCtxOrSystem,
+ NLog::EPriority mPriority,
+ NLog::EComponent mComponent,
+ TString&& str) {
+
+ MemLogWrite(str.data(), str.size(), true);
+ DeliverLogMessage(actorCtxOrSystem, mPriority, mComponent, std::move(str));
+ }
+}
diff --git a/library/cpp/actors/core/log_iface.h b/library/cpp/actors/core/log_iface.h
new file mode 100644
index 0000000000..b331db9ca8
--- /dev/null
+++ b/library/cpp/actors/core/log_iface.h
@@ -0,0 +1,109 @@
+#pragma once
+
+#include "events.h"
+#include "event_local.h"
+
+namespace NActors {
+ namespace NLog {
+ using EComponent = int;
+
+ enum EPriority : ui16 { // migrate it to EPrio whenever possible
+ PRI_EMERG,
+ PRI_ALERT,
+ PRI_CRIT,
+ PRI_ERROR,
+ PRI_WARN,
+ PRI_NOTICE,
+ PRI_INFO,
+ PRI_DEBUG,
+ PRI_TRACE
+ };
+
+ enum class EPrio : ui16 {
+ Emerg = 0,
+ Alert = 1,
+ Crit = 2,
+ Error = 3,
+ Warn = 4,
+ Notice = 5,
+ Info = 6,
+ Debug = 7,
+ Trace = 8,
+ };
+
+ struct TLevel {
+ TLevel(ui32 raw)
+ : Raw(raw)
+ {
+ }
+
+ TLevel(EPrio prio)
+ : Raw((ui16(prio) + 1) << 8)
+ {
+ }
+
+ EPrio ToPrio() const noexcept {
+ const auto major = Raw >> 8;
+
+ return major > 0 ? EPrio(major - 1) : EPrio::Emerg;
+ }
+
+ bool IsUrgentAbortion() const noexcept {
+ return (Raw >> 8) == 0;
+ }
+
+ /* Generalized monotonic level value composed with major and minor
+ levels. Minor is used for verbosity within major, basic EPrio
+ mapped to (EPrio + 1, 0) and Major = 0 is reserved as special
+ space with meaning like EPrio::Emerg but with extened actions.
+ Thus logger should map Major = 0 to EPrio::Emerg if it have no
+ idea how to handle special emergency actions.
+ */
+
+ ui32 Raw = 0; // ((ui16(EPrio) + 1) << 8) | ui8(minor)
+ };
+
+ enum class EEv {
+ Log = EventSpaceBegin(TEvents::ES_LOGGER),
+ LevelReq,
+ LevelResp,
+ Ignored,
+ End
+ };
+
+ static_assert(int(EEv::End) < EventSpaceEnd(TEvents::ES_LOGGER), "");
+
+ class TEvLog: public TEventLocal<TEvLog, int(EEv::Log)> {
+ public:
+ TEvLog(TInstant stamp, TLevel level, EComponent comp, const TString &line)
+ : Stamp(stamp)
+ , Level(level)
+ , Component(comp)
+ , Line(line)
+ {
+ }
+
+ TEvLog(TInstant stamp, TLevel level, EComponent comp, TString &&line)
+ : Stamp(stamp)
+ , Level(level)
+ , Component(comp)
+ , Line(std::move(line))
+ {
+ }
+
+ TEvLog(EPriority prio, EComponent comp, TString line, TInstant time = TInstant::Now())
+ : Stamp(time)
+ , Level(EPrio(prio))
+ , Component(comp)
+ , Line(std::move(line))
+ {
+ }
+
+ const TInstant Stamp = TInstant::Max();
+ const TLevel Level;
+ const EComponent Component = 0;
+ TString Line;
+ };
+
+ }
+}
diff --git a/library/cpp/actors/core/log_settings.cpp b/library/cpp/actors/core/log_settings.cpp
new file mode 100644
index 0000000000..f52f2fc5d2
--- /dev/null
+++ b/library/cpp/actors/core/log_settings.cpp
@@ -0,0 +1,230 @@
+#include "log_settings.h"
+
+#include <util/stream/str.h>
+
+namespace NActors {
+ namespace NLog {
+ TSettings::TSettings(const TActorId& loggerActorId, const EComponent loggerComponent,
+ EComponent minVal, EComponent maxVal, EComponentToStringFunc func,
+ EPriority defPriority, EPriority defSamplingPriority,
+ ui32 defSamplingRate, ui64 timeThresholdMs)
+ : LoggerActorId(loggerActorId)
+ , LoggerComponent(loggerComponent)
+ , TimeThresholdMs(timeThresholdMs)
+ , AllowDrop(true)
+ , ThrottleDelay(TDuration::MilliSeconds(100))
+ , MinVal(0)
+ , MaxVal(0)
+ , Mask(0)
+ , DefPriority(defPriority)
+ , DefSamplingPriority(defSamplingPriority)
+ , DefSamplingRate(defSamplingRate)
+ , UseLocalTimestamps(false)
+ , Format(PLAIN_FULL_FORMAT)
+ , ShortHostName("")
+ , ClusterName("")
+ {
+ Append(minVal, maxVal, func);
+ }
+
+ TSettings::TSettings(const TActorId& loggerActorId, const EComponent loggerComponent,
+ EPriority defPriority, EPriority defSamplingPriority,
+ ui32 defSamplingRate, ui64 timeThresholdMs)
+ : LoggerActorId(loggerActorId)
+ , LoggerComponent(loggerComponent)
+ , TimeThresholdMs(timeThresholdMs)
+ , AllowDrop(true)
+ , ThrottleDelay(TDuration::MilliSeconds(100))
+ , MinVal(0)
+ , MaxVal(0)
+ , Mask(0)
+ , DefPriority(defPriority)
+ , DefSamplingPriority(defSamplingPriority)
+ , DefSamplingRate(defSamplingRate)
+ , UseLocalTimestamps(false)
+ , Format(PLAIN_FULL_FORMAT)
+ , ShortHostName("")
+ , ClusterName("")
+ {
+ }
+
+ void TSettings::Append(EComponent minVal, EComponent maxVal, EComponentToStringFunc func) {
+ Y_VERIFY(minVal >= 0, "NLog::TSettings: minVal must be non-negative");
+ Y_VERIFY(maxVal > minVal, "NLog::TSettings: maxVal must be greater than minVal");
+
+ // update bounds
+ if (!MaxVal || minVal < MinVal) {
+ MinVal = minVal;
+ }
+
+ if (!MaxVal || maxVal > MaxVal) {
+ MaxVal = maxVal;
+
+ // expand ComponentNames to the new bounds
+ auto oldMask = Mask;
+ Mask = PowerOf2Mask(MaxVal);
+
+ TArrayHolder<TAtomic> oldComponentInfo(new TAtomic[Mask + 1]);
+ ComponentInfo.Swap(oldComponentInfo);
+ int startVal = oldMask ? oldMask + 1 : 0;
+ for (int i = 0; i < startVal; i++) {
+ AtomicSet(ComponentInfo[i], AtomicGet(oldComponentInfo[i]));
+ }
+
+ TComponentSettings defSetting(DefPriority, DefSamplingPriority, DefSamplingRate);
+ for (int i = startVal; i < Mask + 1; i++) {
+ AtomicSet(ComponentInfo[i], defSetting.Raw.Data);
+ }
+
+ ComponentNames.resize(Mask + 1);
+ }
+
+ // assign new names but validate if newly added members were not used before
+ for (int i = minVal; i <= maxVal; i++) {
+ Y_VERIFY(!ComponentNames[i], "component name at %d already set: %s",
+ i, ComponentNames[i].data());
+ ComponentNames[i] = func(i);
+ }
+ }
+
+ int TSettings::SetLevelImpl(
+ const TString& name, bool isSampling,
+ EPriority priority, EComponent component, TString& explanation) {
+ TString titleName(name);
+ titleName.to_title();
+
+ // check priority
+ if (!IsValidPriority(priority)) {
+ TStringStream str;
+ str << "Invalid " << name;
+ explanation = str.Str();
+ return 1;
+ }
+
+ if (component == InvalidComponent) {
+ for (int i = 0; i < Mask + 1; i++) {
+ TComponentSettings settings = AtomicGet(ComponentInfo[i]);
+ if (isSampling) {
+ settings.Raw.X.SamplingLevel = priority;
+ } else {
+ settings.Raw.X.Level = priority;
+ }
+ AtomicSet(ComponentInfo[i], settings.Raw.Data);
+ }
+
+ TStringStream str;
+
+ str << titleName
+ << " for all components has been changed to "
+ << PriorityToString(EPrio(priority));
+ explanation = str.Str();
+ return 0;
+ } else {
+ if (!IsValidComponent(component)) {
+ explanation = "Invalid component";
+ return 1;
+ }
+ TComponentSettings settings = AtomicGet(ComponentInfo[component]);
+ EPriority oldPriority;
+ if (isSampling) {
+ oldPriority = (EPriority)settings.Raw.X.SamplingLevel;
+ settings.Raw.X.SamplingLevel = priority;
+ } else {
+ oldPriority = (EPriority)settings.Raw.X.Level;
+ settings.Raw.X.Level = priority;
+ }
+ AtomicSet(ComponentInfo[component], settings.Raw.Data);
+ TStringStream str;
+ str << titleName << " for the component " << ComponentNames[component]
+ << " has been changed from " << PriorityToString(EPrio(oldPriority))
+ << " to " << PriorityToString(EPrio(priority));
+ explanation = str.Str();
+ return 0;
+ }
+ }
+
+ int TSettings::SetLevel(EPriority priority, EComponent component, TString& explanation) {
+ return SetLevelImpl("priority", false,
+ priority, component, explanation);
+ }
+
+ int TSettings::SetSamplingLevel(EPriority priority, EComponent component, TString& explanation) {
+ return SetLevelImpl("sampling priority", true,
+ priority, component, explanation);
+ }
+
+ int TSettings::SetSamplingRate(ui32 sampling, EComponent component, TString& explanation) {
+ if (component == InvalidComponent) {
+ for (int i = 0; i < Mask + 1; i++) {
+ TComponentSettings settings = AtomicGet(ComponentInfo[i]);
+ settings.Raw.X.SamplingRate = sampling;
+ AtomicSet(ComponentInfo[i], settings.Raw.Data);
+ }
+ TStringStream str;
+ str << "Sampling rate for all components has been changed to " << sampling;
+ explanation = str.Str();
+ } else {
+ if (!IsValidComponent(component)) {
+ explanation = "Invalid component";
+ return 1;
+ }
+ TComponentSettings settings = AtomicGet(ComponentInfo[component]);
+ ui32 oldSampling = settings.Raw.X.SamplingRate;
+ settings.Raw.X.SamplingRate = sampling;
+ AtomicSet(ComponentInfo[component], settings.Raw.Data);
+ TStringStream str;
+ str << "Sampling rate for the component " << ComponentNames[component]
+ << " has been changed from " << oldSampling
+ << " to " << sampling;
+ explanation = str.Str();
+ }
+ return 0;
+ }
+
+ int TSettings::PowerOf2Mask(int val) {
+ int mask = 1;
+ while ((val & mask) != val) {
+ mask <<= 1;
+ mask |= 1;
+ }
+ return mask;
+ }
+
+ bool TSettings::IsValidPriority(EPriority priority) {
+ return priority == PRI_EMERG || priority == PRI_ALERT ||
+ priority == PRI_CRIT || priority == PRI_ERROR ||
+ priority == PRI_WARN || priority == PRI_NOTICE ||
+ priority == PRI_INFO || priority == PRI_DEBUG || priority == PRI_TRACE;
+ }
+
+ bool TSettings::IsValidComponent(EComponent component) {
+ return (MinVal <= component) && (component <= MaxVal) && !ComponentNames[component].empty();
+ }
+
+ void TSettings::SetAllowDrop(bool val) {
+ AllowDrop = val;
+ }
+
+ void TSettings::SetThrottleDelay(TDuration value) {
+ ThrottleDelay = value;
+ }
+
+ void TSettings::SetUseLocalTimestamps(bool value) {
+ UseLocalTimestamps = value;
+ }
+
+ EComponent TSettings::FindComponent(const TStringBuf& componentName) const {
+ if (componentName.empty())
+ return InvalidComponent;
+
+ for (EComponent component = MinVal; component <= MaxVal; ++component) {
+ if (ComponentNames[component] == componentName)
+ return component;
+ }
+
+ return InvalidComponent;
+ }
+
+ }
+
+}
diff --git a/library/cpp/actors/core/log_settings.h b/library/cpp/actors/core/log_settings.h
new file mode 100644
index 0000000000..7fe4504edd
--- /dev/null
+++ b/library/cpp/actors/core/log_settings.h
@@ -0,0 +1,176 @@
+#pragma once
+
+#include "actor.h"
+#include "log_iface.h"
+#include <util/generic/vector.h>
+#include <util/digest/murmur.h>
+#include <util/random/easy.h>
+
+namespace NActors {
+ namespace NLog {
+ inline const char* PriorityToString(EPrio priority) {
+ switch (priority) {
+ case EPrio::Emerg:
+ return "EMERG";
+ case EPrio::Alert:
+ return "ALERT";
+ case EPrio::Crit:
+ return "CRIT";
+ case EPrio::Error:
+ return "ERROR";
+ case EPrio::Warn:
+ return "WARN";
+ case EPrio::Notice:
+ return "NOTICE";
+ case EPrio::Info:
+ return "INFO";
+ case EPrio::Debug:
+ return "DEBUG";
+ case EPrio::Trace:
+ return "TRACE";
+ default:
+ return "UNKNOWN";
+ }
+ }
+
+ // You can structure your program to have multiple logical components.
+ // In this case you can set different log priorities for different
+ // components. And you can change component's priority while system
+ // is running. Suspect a component has a bug? Turn DEBUG priority level on
+ // for this component.
+ static const int InvalidComponent = -1;
+
+ // Functions converts EComponent id to string
+ using EComponentToStringFunc = std::function<const TString&(EComponent)>;
+ ;
+
+ // Log settings
+ struct TComponentSettings {
+ union {
+ struct {
+ ui32 SamplingRate;
+ ui8 SamplingLevel;
+ ui8 Level;
+ } X;
+
+ ui64 Data;
+ } Raw;
+
+ TComponentSettings(TAtomicBase data) {
+ Raw.Data = (ui64)data;
+ }
+
+ TComponentSettings(ui8 level, ui8 samplingLevel, ui32 samplingRate) {
+ Raw.X.Level = level;
+ Raw.X.SamplingLevel = samplingLevel;
+ Raw.X.SamplingRate = samplingRate;
+ }
+ };
+
+ struct TSettings: public TThrRefBase {
+ public:
+ TActorId LoggerActorId;
+ EComponent LoggerComponent;
+ ui64 TimeThresholdMs;
+ bool AllowDrop;
+ TDuration ThrottleDelay;
+ TArrayHolder<TAtomic> ComponentInfo;
+ TVector<TString> ComponentNames;
+ EComponent MinVal;
+ EComponent MaxVal;
+ EComponent Mask;
+ EPriority DefPriority;
+ EPriority DefSamplingPriority;
+ ui32 DefSamplingRate;
+ bool UseLocalTimestamps;
+
+ enum ELogFormat {
+ PLAIN_FULL_FORMAT,
+ PLAIN_SHORT_FORMAT,
+ JSON_FORMAT
+ };
+ ELogFormat Format;
+ TString ShortHostName;
+ TString ClusterName;
+ TString MessagePrefix;
+
+ // The best way to provide minVal, maxVal and func is to have
+ // protobuf enumeration of components. In this case protoc
+ // automatically generates YOURTYPE_MIN, YOURTYPE_MAX and
+ // YOURTYPE_Name for you.
+ TSettings(const TActorId& loggerActorId, const EComponent loggerComponent,
+ EComponent minVal, EComponent maxVal, EComponentToStringFunc func,
+ EPriority defPriority, EPriority defSamplingPriority = PRI_DEBUG,
+ ui32 defSamplingRate = 0, ui64 timeThresholdMs = 1000);
+
+ TSettings(const TActorId& loggerActorId, const EComponent loggerComponent,
+ EPriority defPriority, EPriority defSamplingPriority = PRI_DEBUG,
+ ui32 defSamplingRate = 0, ui64 timeThresholdMs = 1000);
+
+ void Append(EComponent minVal, EComponent maxVal, EComponentToStringFunc func);
+
+ template <typename T>
+ void Append(T minVal, T maxVal, const TString& (*func)(T)) {
+ Append(
+ static_cast<EComponent>(minVal),
+ static_cast<EComponent>(maxVal),
+ [func](EComponent c) -> const TString& {
+ return func(static_cast<T>(c));
+ }
+ );
+ }
+
+ inline bool Satisfies(EPriority priority, EComponent component, ui64 sampleBy = 0) const {
+ // by using Mask we don't get outside of array boundaries
+ TComponentSettings settings = GetComponentSettings(component);
+ if (priority > settings.Raw.X.Level) {
+ if (priority > settings.Raw.X.SamplingLevel) {
+ return false; // priority > both levels ==> do not log
+ }
+ // priority <= sampling level ==> apply sampling
+ ui32 samplingRate = settings.Raw.X.SamplingRate;
+ if (samplingRate) {
+ ui32 samplingValue = sampleBy ? MurmurHash<ui32>((const char*)&sampleBy, sizeof(sampleBy))
+ : samplingRate != 1 ? RandomNumber<ui32>() : 0;
+ return (samplingValue % samplingRate == 0);
+ } else {
+ // sampling rate not set ==> do not log
+ return false;
+ }
+ } else {
+ // priority <= log level ==> log
+ return true;
+ }
+ }
+
+ inline TComponentSettings GetComponentSettings(EComponent component) const {
+ Y_VERIFY_DEBUG((component & Mask) == component);
+ // by using Mask we don't get outside of array boundaries
+ return TComponentSettings(AtomicGet(ComponentInfo[component & Mask]));
+ }
+
+ const char* ComponentName(EComponent component) const {
+ Y_VERIFY_DEBUG((component & Mask) == component);
+ return ComponentNames[component & Mask].data();
+ }
+
+ int SetLevel(EPriority priority, EComponent component, TString& explanation);
+ int SetSamplingLevel(EPriority priority, EComponent component, TString& explanation);
+ int SetSamplingRate(ui32 sampling, EComponent component, TString& explanation);
+ EComponent FindComponent(const TStringBuf& componentName) const;
+ static int PowerOf2Mask(int val);
+ static bool IsValidPriority(EPriority priority);
+ bool IsValidComponent(EComponent component);
+ void SetAllowDrop(bool val);
+ void SetThrottleDelay(TDuration value);
+ void SetUseLocalTimestamps(bool value);
+
+ private:
+ int SetLevelImpl(
+ const TString& name, bool isSampling,
+ EPriority priority, EComponent component, TString& explanation);
+ };
+
+ }
+
+}
diff --git a/library/cpp/actors/core/log_ut.cpp b/library/cpp/actors/core/log_ut.cpp
new file mode 100644
index 0000000000..09b5f88ea2
--- /dev/null
+++ b/library/cpp/actors/core/log_ut.cpp
@@ -0,0 +1,185 @@
+#include "log.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <library/cpp/actors/testlib/test_runtime.h>
+
+using namespace NMonitoring;
+using namespace NActors;
+using namespace NActors::NLog;
+
+namespace {
+ const TString& ServiceToString(int) {
+ static const TString FAKE{"FAKE"};
+ return FAKE;
+ }
+
+ TIntrusivePtr<TSettings> DefaultSettings() {
+ auto loggerId = TActorId{0, "Logger"};
+ auto s = MakeIntrusive<TSettings>(loggerId, 0, EPriority::PRI_TRACE);
+ s->SetAllowDrop(false);
+ s->Append(0, 1, ServiceToString);
+ return s;
+ }
+
+ TIntrusivePtr<TSettings> DroppingSettings(ui64 timeThresholdMs) {
+ auto loggerId = TActorId{0, "Logger"};
+ auto s = MakeIntrusive<TSettings>(
+ loggerId,
+ 0,
+ EPriority::PRI_TRACE,
+ EPriority::PRI_DEBUG,
+ (ui32)0,
+ timeThresholdMs);
+ s->Append(0, 1, ServiceToString);
+ return s;
+ }
+
+ class TMockBackend: public TLogBackend {
+ public:
+ using TWriteImpl = std::function<void(const TLogRecord&)>;
+ using TReopenImpl = std::function<void()>;
+
+ static void REOPEN_NOP() { }
+
+ TMockBackend(TWriteImpl writeImpl, TReopenImpl reopenImpl = REOPEN_NOP)
+ : WriteImpl_{writeImpl}
+ , ReopenImpl_{reopenImpl}
+ {
+ }
+
+ void WriteData(const TLogRecord& r) override {
+ WriteImpl_(r);
+ }
+
+ void ReopenLog() override { }
+
+ void SetWriteImpl(TWriteImpl writeImpl) {
+ WriteImpl_ = writeImpl;
+ }
+
+ private:
+ TWriteImpl WriteImpl_;
+ TReopenImpl ReopenImpl_;
+ };
+
+ void ThrowAlways(const TLogRecord&) {
+ ythrow yexception();
+ };
+
+ struct TFixture {
+ TFixture(
+ TIntrusivePtr<TSettings> settings,
+ TMockBackend::TWriteImpl writeImpl = ThrowAlways)
+ {
+ Runtime.Initialize();
+ LogBackend.reset(new TMockBackend{writeImpl});
+ LoggerActor = Runtime.Register(new TLoggerActor{settings, LogBackend, Counters});
+ Runtime.SetScheduledEventFilter([] (auto&&, auto&&, auto&&, auto) {
+ return false;
+ });
+ }
+
+ TFixture(TMockBackend::TWriteImpl writeImpl = ThrowAlways)
+ : TFixture(DefaultSettings(), writeImpl)
+ {}
+
+ void WriteLog() {
+ Runtime.Send(new IEventHandle{LoggerActor, {}, new TEvLog(TInstant::Zero(), TLevel{EPrio::Emerg}, 0, "foo")});
+ }
+
+ void WriteLog(TInstant ts) {
+ Runtime.Send(new IEventHandle{LoggerActor, {}, new TEvLog(ts, TLevel{EPrio::Emerg}, 0, "foo")});
+ }
+
+ void Wakeup() {
+ Runtime.Send(new IEventHandle{LoggerActor, {}, new TEvents::TEvWakeup});
+ }
+
+ TIntrusivePtr<TDynamicCounters> Counters{MakeIntrusive<TDynamicCounters>()};
+ std::shared_ptr<TMockBackend> LogBackend;
+ TActorId LoggerActor;
+ TTestActorRuntimeBase Runtime;
+ };
+}
+
+
+Y_UNIT_TEST_SUITE(TLoggerActorTest) {
+ Y_UNIT_TEST(NoCrashOnWriteFailure) {
+ TFixture test;
+ test.WriteLog();
+ // everything is okay as long as we get here
+ }
+
+ Y_UNIT_TEST(SubsequentWritesAreIgnored) {
+ size_t count{0};
+ auto countWrites = [&count] (auto&& r) {
+ count++;
+ ThrowAlways(r);
+ };
+
+ TFixture test{countWrites};
+ test.WriteLog();
+ UNIT_ASSERT_VALUES_EQUAL(count, 1);
+
+ // at this point we should have started dropping messages
+ for (auto i = 0; i < 5; ++i) {
+ test.WriteLog();
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(count, 1);
+ }
+
+ Y_UNIT_TEST(LoggerCanRecover) {
+ TFixture test;
+ test.WriteLog();
+
+ TVector<TString> messages;
+ auto acceptWrites = [&] (const TLogRecord& r) {
+ messages.emplace_back(r.Data, r.Len);
+ };
+
+ auto scheduled = test.Runtime.CaptureScheduledEvents();
+ UNIT_ASSERT_VALUES_EQUAL(scheduled.size(), 1);
+
+ test.LogBackend->SetWriteImpl(acceptWrites);
+ test.Wakeup();
+
+ const auto COUNT = 10;
+ for (auto i = 0; i < COUNT; ++i) {
+ test.WriteLog();
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(messages.size(), COUNT);
+ }
+
+ Y_UNIT_TEST(ShouldObeyTimeThresholdMsWhenOverloaded) {
+ TFixture test{DroppingSettings(5000)};
+
+ TVector<TString> messages;
+ auto acceptWrites = [&] (const TLogRecord& r) {
+ messages.emplace_back(r.Data, r.Len);
+ };
+
+ test.LogBackend->SetWriteImpl(acceptWrites);
+ test.Wakeup();
+
+ const auto COUNT = 11;
+ for (auto i = 0; i < COUNT; ++i) {
+ test.WriteLog();
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(messages.size(), COUNT);
+
+ test.Runtime.AdvanceCurrentTime(TDuration::Seconds(20));
+ auto now = test.Runtime.GetCurrentTime();
+
+ test.WriteLog(now - TDuration::Seconds(5));
+
+ UNIT_ASSERT_VALUES_EQUAL(messages.size(), COUNT + 1);
+
+ test.WriteLog(now - TDuration::Seconds(6));
+
+ UNIT_ASSERT_VALUES_EQUAL(messages.size(), COUNT + 1);
+ }
+}
diff --git a/library/cpp/actors/core/mailbox.cpp b/library/cpp/actors/core/mailbox.cpp
new file mode 100644
index 0000000000..d84b4f9e46
--- /dev/null
+++ b/library/cpp/actors/core/mailbox.cpp
@@ -0,0 +1,551 @@
+#include "mailbox.h"
+#include "actorsystem.h"
+
+#include <library/cpp/actors/util/datetime.h>
+
+#include <util/system/sanitizers.h>
+
+namespace NActors {
+ TMailboxTable::TMailboxTable()
+ : LastAllocatedLine(0)
+ , AllocatedMailboxCount(0)
+ , CachedSimpleMailboxes(0)
+ , CachedRevolvingMailboxes(0)
+ , CachedHTSwapMailboxes(0)
+ , CachedReadAsFilledMailboxes(0)
+ , CachedTinyReadAsFilledMailboxes(0)
+ {
+ memset((void*)Lines, 0, sizeof(Lines));
+ }
+
+ bool IsGoodForCleanup(const TMailboxHeader* header) {
+ switch (AtomicLoad(&header->ExecutionState)) {
+ case TMailboxHeader::TExecutionState::Inactive:
+ case TMailboxHeader::TExecutionState::Scheduled:
+ return true;
+ case TMailboxHeader::TExecutionState::Leaving:
+ case TMailboxHeader::TExecutionState::Executing:
+ case TMailboxHeader::TExecutionState::LeavingMarked:
+ return false;
+ case TMailboxHeader::TExecutionState::Free:
+ case TMailboxHeader::TExecutionState::FreeScheduled:
+ return true;
+ case TMailboxHeader::TExecutionState::FreeLeaving:
+ case TMailboxHeader::TExecutionState::FreeExecuting:
+ case TMailboxHeader::TExecutionState::FreeLeavingMarked:
+ return false;
+ default:
+ Y_FAIL();
+ }
+ }
+
+ template <typename TMailbox>
+ void DestructMailboxLine(ui8* begin, ui8* end) {
+ const ui32 sx = TMailbox::AlignedSize();
+ for (ui8* x = begin; x + sx <= end; x += sx) {
+ TMailbox* mailbox = reinterpret_cast<TMailbox*>(x);
+ Y_VERIFY(IsGoodForCleanup(mailbox));
+ mailbox->ExecutionState = Max<ui32>();
+ mailbox->~TMailbox();
+ }
+ }
+
+ template <typename TMailbox>
+ bool CleanupMailboxLine(ui8* begin, ui8* end) {
+ const ui32 sx = TMailbox::AlignedSize();
+ bool done = true;
+ for (ui8* x = begin; x + sx <= end; x += sx) {
+ TMailbox* mailbox = reinterpret_cast<TMailbox*>(x);
+ Y_VERIFY(IsGoodForCleanup(mailbox));
+ done &= mailbox->CleanupActors() && mailbox->CleanupEvents();
+ }
+ return done;
+ }
+
+ TMailboxTable::~TMailboxTable() {
+ // on cleanup we must traverse everything and free stuff
+ for (ui32 i = 0; i < LastAllocatedLine; ++i) {
+ if (TMailboxLineHeader* lineHeader = Lines[i]) {
+ switch (lineHeader->MailboxType) {
+ case TMailboxType::Simple:
+ DestructMailboxLine<TSimpleMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+ break;
+ case TMailboxType::Revolving:
+ DestructMailboxLine<TRevolvingMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+ break;
+ case TMailboxType::HTSwap:
+ DestructMailboxLine<THTSwapMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+ break;
+ case TMailboxType::ReadAsFilled:
+ DestructMailboxLine<TReadAsFilledMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+ break;
+ case TMailboxType::TinyReadAsFilled:
+ DestructMailboxLine<TTinyReadAsFilledMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+ break;
+ default:
+ Y_FAIL();
+ }
+
+ lineHeader->~TMailboxLineHeader();
+ free(lineHeader);
+ Lines[i] = nullptr;
+ }
+ }
+
+ while (MailboxCacheSimple.Pop(0))
+ ;
+ while (MailboxCacheRevolving.Pop(0))
+ ;
+ while (MailboxCacheHTSwap.Pop(0))
+ ;
+ while (MailboxCacheReadAsFilled.Pop(0))
+ ;
+ while (MailboxCacheTinyReadAsFilled.Pop(0))
+ ;
+ }
+
+ bool TMailboxTable::Cleanup() {
+ bool done = true;
+ for (ui32 i = 0; i < LastAllocatedLine; ++i) {
+ if (TMailboxLineHeader* lineHeader = Lines[i]) {
+ switch (lineHeader->MailboxType) {
+ case TMailboxType::Simple:
+ done &= CleanupMailboxLine<TSimpleMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+ break;
+ case TMailboxType::Revolving:
+ done &= CleanupMailboxLine<TRevolvingMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+ break;
+ case TMailboxType::HTSwap:
+ done &= CleanupMailboxLine<THTSwapMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+ break;
+ case TMailboxType::ReadAsFilled:
+ done &= CleanupMailboxLine<TReadAsFilledMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+ break;
+ case TMailboxType::TinyReadAsFilled:
+ done &= CleanupMailboxLine<TTinyReadAsFilledMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+ break;
+ default:
+ Y_FAIL();
+ }
+ }
+ }
+ return done;
+ }
+
+ TMailboxHeader* TMailboxTable::Get(ui32 hint) {
+ // get line
+ const ui32 lineIndex = (hint & LineIndexMask) >> LineIndexShift;
+ const ui32 lineHint = hint & LineHintMask;
+
+ Y_VERIFY((lineIndex < MaxLines) && (lineHint < LineSize / 64));
+ if (lineHint == 0)
+ return nullptr;
+
+ if (TMailboxLineHeader* const x = AtomicLoad(Lines + lineIndex)) {
+ switch (x->MailboxType) {
+ case TMailboxType::Simple:
+ return TSimpleMailbox::Get(lineHint, x);
+ case TMailboxType::Revolving:
+ return TRevolvingMailbox::Get(lineHint, x);
+ case TMailboxType::HTSwap:
+ return THTSwapMailbox::Get(lineHint, x);
+ case TMailboxType::ReadAsFilled:
+ return TReadAsFilledMailbox::Get(lineHint, x);
+ case TMailboxType::TinyReadAsFilled:
+ return TTinyReadAsFilledMailbox::Get(lineHint, x);
+ default:
+ Y_VERIFY_DEBUG(false);
+ break;
+ }
+ }
+
+ return nullptr;
+ }
+
+ bool TMailboxTable::SendTo(TAutoPtr<IEventHandle>& ev, IExecutorPool* executorPool) {
+ const TActorId& recipient = ev->GetRecipientRewrite();
+ const ui32 hint = recipient.Hint();
+
+ // copy-paste from Get to avoid duplicated type-switches
+ const ui32 lineIndex = (hint & LineIndexMask) >> LineIndexShift;
+ const ui32 lineHint = hint & LineHintMask;
+
+ Y_VERIFY((lineIndex < MaxLines) && (lineHint < LineSize / 64));
+ if (lineHint == 0)
+ return false;
+
+ if (TMailboxLineHeader* const x = AtomicLoad(Lines + lineIndex)) {
+ switch (x->MailboxType) {
+ case TMailboxType::Simple: {
+ TSimpleMailbox* const mailbox = TSimpleMailbox::Get(lineHint, x);
+#if (!defined(_tsan_enabled_))
+ Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType);
+#endif
+ mailbox->Queue.Push(ev.Release());
+ if (mailbox->MarkForSchedule()) {
+ RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());
+ executorPool->ScheduleActivation(hint);
+ }
+ }
+ return true;
+ case TMailboxType::Revolving: {
+ // The actorid could be stale and coming from a different machine. If local process has restarted than
+ // the stale actorid coming from a remote machine might be referencing an actor with simple mailbox
+ // which is smaller than revolving mailbox. In this cases 'lineHint' index might be greater than actual
+ // array size. Normally its ok to store stale event to other actor's valid mailbox beacuse Receive will
+ // compare receiver actor id and discard stale event. But in this case we should discard the event right away
+ // instead of trying to enque it to a mailbox at invalid address.
+ // NOTE: lineHint is 1-based
+ static_assert(TSimpleMailbox::AlignedSize() <= TRevolvingMailbox::AlignedSize(),
+ "We expect that one line can store more simple mailboxes than revolving mailboxes");
+ if (lineHint > TRevolvingMailbox::MaxMailboxesInLine())
+ return false;
+
+ TRevolvingMailbox* const mailbox = TRevolvingMailbox::Get(lineHint, x);
+#if (!defined(_tsan_enabled_))
+ Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType);
+#endif
+ mailbox->QueueWriter.Push(ev.Release());
+ if (mailbox->MarkForSchedule()) {
+ RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());
+ executorPool->ScheduleActivation(hint);
+ }
+ }
+ return true;
+ case TMailboxType::HTSwap: {
+ THTSwapMailbox* const mailbox = THTSwapMailbox::Get(lineHint, x);
+#if (!defined(_tsan_enabled_))
+ Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType);
+#endif
+ mailbox->Queue.Push(ev.Release());
+ if (mailbox->MarkForSchedule()) {
+ RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());
+ executorPool->ScheduleActivation(hint);
+ }
+ }
+ return true;
+ case TMailboxType::ReadAsFilled: {
+ if (lineHint > TReadAsFilledMailbox::MaxMailboxesInLine())
+ return false;
+
+ TReadAsFilledMailbox* const mailbox = TReadAsFilledMailbox::Get(lineHint, x);
+#if (!defined(_tsan_enabled_))
+ Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType);
+#endif
+ mailbox->Queue.Push(ev.Release());
+ if (mailbox->MarkForSchedule()) {
+ RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());
+ executorPool->ScheduleActivation(hint);
+ }
+ }
+ return true;
+ case TMailboxType::TinyReadAsFilled: {
+ if (lineHint > TTinyReadAsFilledMailbox::MaxMailboxesInLine())
+ return false;
+
+ TTinyReadAsFilledMailbox* const mailbox = TTinyReadAsFilledMailbox::Get(lineHint, x);
+#if (!defined(_tsan_enabled_))
+ Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType);
+#endif
+ mailbox->Queue.Push(ev.Release());
+ if (mailbox->MarkForSchedule()) {
+ RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());
+ executorPool->ScheduleActivation(hint);
+ }
+ }
+ return true;
+ default:
+ Y_FAIL("unknown mailbox type");
+ }
+ }
+
+ return false;
+ }
+
+ ui32 TMailboxTable::AllocateMailbox(TMailboxType::EType type, ui64 revolvingCounter) {
+ ui32 x = TryAllocateMailbox(type, revolvingCounter);
+ if (x == 0)
+ x = AllocateNewLine(type);
+ return x;
+ }
+
+ ui32 TMailboxTable::TryAllocateMailbox(TMailboxType::EType type, ui64 revolvingCounter) {
+ switch (type) {
+ case TMailboxType::Simple:
+ do {
+ if (ui32 ret = MailboxCacheSimple.Pop(revolvingCounter)) {
+ AtomicDecrement(CachedSimpleMailboxes);
+ return ret;
+ }
+ } while (AtomicGet(CachedSimpleMailboxes) > (MailboxCacheSimple.Concurrency * 512));
+ return 0;
+ case TMailboxType::Revolving:
+ do {
+ if (ui32 ret = MailboxCacheRevolving.Pop(revolvingCounter)) {
+ AtomicDecrement(CachedRevolvingMailboxes);
+ return ret;
+ }
+ } while (AtomicGet(CachedRevolvingMailboxes) > (MailboxCacheRevolving.Concurrency * 512));
+ return 0;
+ case TMailboxType::HTSwap:
+ do {
+ if (ui32 ret = MailboxCacheHTSwap.Pop(revolvingCounter)) {
+ AtomicDecrement(CachedHTSwapMailboxes);
+ return ret;
+ }
+ } while (AtomicGet(CachedHTSwapMailboxes) > (MailboxCacheHTSwap.Concurrency * 512));
+ return 0;
+ case TMailboxType::ReadAsFilled:
+ do {
+ if (ui32 ret = MailboxCacheReadAsFilled.Pop(revolvingCounter)) {
+ AtomicDecrement(CachedReadAsFilledMailboxes);
+ return ret;
+ }
+ } while (AtomicGet(CachedReadAsFilledMailboxes) > (MailboxCacheReadAsFilled.Concurrency * 512));
+ return 0;
+ case TMailboxType::TinyReadAsFilled:
+ do {
+ if (ui32 ret = MailboxCacheTinyReadAsFilled.Pop(revolvingCounter)) {
+ AtomicDecrement(CachedTinyReadAsFilledMailboxes);
+ return ret;
+ }
+ } while (AtomicGet(CachedTinyReadAsFilledMailboxes) > (MailboxCacheTinyReadAsFilled.Concurrency * 512));
+ return 0;
+ default:
+ Y_FAIL("Unknown mailbox type");
+ }
+ }
+
+ void TMailboxTable::ReclaimMailbox(TMailboxType::EType type, ui32 hint, ui64 revolvingCounter) {
+ if (hint != 0) {
+ switch (type) {
+ case TMailboxType::Simple:
+ MailboxCacheSimple.Push(hint, revolvingCounter);
+ AtomicIncrement(CachedSimpleMailboxes);
+ break;
+ case TMailboxType::Revolving:
+ MailboxCacheRevolving.Push(hint, revolvingCounter);
+ AtomicIncrement(CachedRevolvingMailboxes);
+ break;
+ case TMailboxType::HTSwap:
+ MailboxCacheHTSwap.Push(hint, revolvingCounter);
+ AtomicIncrement(CachedHTSwapMailboxes);
+ break;
+ case TMailboxType::ReadAsFilled:
+ MailboxCacheReadAsFilled.Push(hint, revolvingCounter);
+ AtomicIncrement(CachedReadAsFilledMailboxes);
+ break;
+ case TMailboxType::TinyReadAsFilled:
+ MailboxCacheTinyReadAsFilled.Push(hint, revolvingCounter);
+ AtomicIncrement(CachedTinyReadAsFilledMailboxes);
+ break;
+ default:
+ Y_FAIL();
+ }
+ }
+ }
+
+ TMailboxHeader::TMailboxHeader(TMailboxType::EType type)
+ : ExecutionState(TExecutionState::Free)
+ , Reserved(0)
+ , Type(type)
+ , ActorPack(TMailboxActorPack::Simple)
+ , Knobs(0)
+ {
+ ActorsInfo.Simple.ActorId = 0;
+ ActorsInfo.Simple.Actor = nullptr;
+ }
+
+ TMailboxHeader::~TMailboxHeader() {
+ CleanupActors();
+ }
+
+ bool TMailboxHeader::CleanupActors() {
+ bool done = true;
+ switch (ActorPack) {
+ case TMailboxActorPack::Simple: {
+ if (ActorsInfo.Simple.ActorId != 0) {
+ delete ActorsInfo.Simple.Actor;
+ done = false;
+ }
+ break;
+ }
+ case TMailboxActorPack::Map: {
+ for (auto& [actorId, actor] : *ActorsInfo.Map.ActorsMap) {
+ delete actor;
+ }
+ delete ActorsInfo.Map.ActorsMap;
+ done = false;
+ break;
+ }
+ case TMailboxActorPack::Array: {
+ for (ui64 i = 0; i < ActorsInfo.Array.ActorsCount; ++i) {
+ delete ActorsInfo.Array.ActorsArray->Actors[i].Actor;
+ }
+ delete ActorsInfo.Array.ActorsArray;
+ done = false;
+ break;
+ }
+ }
+ ActorPack = TMailboxActorPack::Simple;
+ ActorsInfo.Simple.ActorId = 0;
+ ActorsInfo.Simple.Actor = nullptr;
+ return done;
+ }
+
+ std::pair<ui32, ui32> TMailboxHeader::CountMailboxEvents(ui64 localActorId, ui32 maxTraverse) {
+ switch (Type) {
+ case TMailboxType::Simple:
+ return static_cast<TMailboxTable::TSimpleMailbox*>(this)->CountSimpleMailboxEvents(localActorId, maxTraverse);
+ case TMailboxType::Revolving:
+ return static_cast<TMailboxTable::TRevolvingMailbox*>(this)->CountRevolvingMailboxEvents(localActorId, maxTraverse);
+ default:
+ return {0, 0};
+ }
+ }
+
+ TMailboxTable::TSimpleMailbox::TSimpleMailbox()
+ : TMailboxHeader(TMailboxType::Simple)
+ , ScheduleMoment(0)
+ {
+ }
+
+ TMailboxTable::TSimpleMailbox::~TSimpleMailbox() {
+ CleanupEvents();
+ }
+
+ bool TMailboxTable::TSimpleMailbox::CleanupEvents() {
+ const bool done = (Queue.Head() == nullptr);
+ while (IEventHandle* ev = Queue.Pop())
+ delete ev;
+ return done;
+ }
+
+ std::pair<ui32, ui32> TMailboxTable::TSimpleMailbox::CountSimpleMailboxEvents(ui64 localActorId, ui32 maxTraverse) {
+ ui32 local = 0;
+ ui32 total = 0;
+
+ auto it = Queue.ReadIterator();
+ while (IEventHandle* x = it.Next()) {
+ ++total;
+ if (x->GetRecipientRewrite().LocalId() == localActorId)
+ ++local;
+ if (total >= maxTraverse)
+ break;
+ }
+
+ return std::make_pair(local, total);
+ }
+
+ TMailboxTable::TRevolvingMailbox::TRevolvingMailbox()
+ : TMailboxHeader(TMailboxType::Revolving)
+ , QueueWriter(QueueReader)
+ , Reserved1(0)
+ , Reserved2(0)
+ , ScheduleMoment(0)
+ {
+ }
+
+ TMailboxTable::TRevolvingMailbox::~TRevolvingMailbox() {
+ CleanupEvents();
+ }
+
+ bool TMailboxTable::TRevolvingMailbox::CleanupEvents() {
+ const bool done = (QueueReader.Head() == nullptr);
+ while (IEventHandle* ev = QueueReader.Pop())
+ delete ev;
+ return done;
+ }
+
+ std::pair<ui32, ui32> TMailboxTable::TRevolvingMailbox::CountRevolvingMailboxEvents(ui64 localActorId, ui32 maxTraverse) {
+ ui32 local = 0;
+ ui32 total = 0;
+
+ auto it = QueueReader.Iterator();
+
+ while (IEventHandle* x = it.Next()) {
+ ++total;
+ if (x->GetRecipientRewrite().LocalId() == localActorId)
+ ++local;
+ if (total >= maxTraverse)
+ break;
+ }
+
+ return std::make_pair(local, total);
+ }
+
+ template <typename T>
+ static ui32 InitNewLine(ui8* x, ui8* end) {
+ const ui32 sx = T::AlignedSize();
+
+ for (ui32 index = 1; x + sx <= end; x += sx, ++index)
+ ::new (x) T();
+
+ return sx;
+ }
+
+ ui32 TMailboxTable::AllocateNewLine(TMailboxType::EType type) {
+ ui8* ptr = (ui8*)malloc(LineSize);
+ ui8* end = ptr + LineSize;
+
+ const ui32 lineIndex = (ui32)AtomicIncrement(LastAllocatedLine) - 1;
+ const ui32 lineIndexMask = (lineIndex << LineIndexShift) & LineIndexMask;
+
+ // first 64 bytes is TMailboxLineHeader
+ TMailboxLineHeader* header = ::new (ptr) TMailboxLineHeader(type, lineIndex);
+
+ ui8* x = ptr + 64;
+ ui32 sx = 0;
+ TMailboxCache* cache = nullptr;
+ TAtomic* counter = nullptr;
+
+ switch (type) {
+ case TMailboxType::Simple:
+ sx = InitNewLine<TSimpleMailbox>(x, end);
+ cache = &MailboxCacheSimple;
+ counter = &CachedSimpleMailboxes;
+ break;
+ case TMailboxType::Revolving:
+ sx = InitNewLine<TRevolvingMailbox>(x, end);
+ cache = &MailboxCacheRevolving;
+ counter = &CachedRevolvingMailboxes;
+ break;
+ case TMailboxType::HTSwap:
+ sx = InitNewLine<THTSwapMailbox>(x, end);
+ cache = &MailboxCacheHTSwap;
+ counter = &CachedHTSwapMailboxes;
+ break;
+ case TMailboxType::ReadAsFilled:
+ sx = InitNewLine<TReadAsFilledMailbox>(x, end);
+ cache = &MailboxCacheReadAsFilled;
+ counter = &CachedReadAsFilledMailboxes;
+ break;
+ case TMailboxType::TinyReadAsFilled:
+ sx = InitNewLine<TTinyReadAsFilledMailbox>(x, end);
+ cache = &MailboxCacheTinyReadAsFilled;
+ counter = &CachedTinyReadAsFilledMailboxes;
+ break;
+ default:
+ Y_FAIL();
+ }
+
+ AtomicStore(Lines + lineIndex, header);
+
+ ui32 ret = lineIndexMask | 1;
+
+ ui32 index = 2;
+ for (ui32 endIndex = LineSize / sx; index != endIndex;) {
+ const ui32 bufSize = 8;
+ ui32 buf[bufSize];
+ ui32 bufIndex;
+ for (bufIndex = 0; index != endIndex && bufIndex != bufSize; ++bufIndex, ++index)
+ buf[bufIndex] = lineIndexMask | index;
+ cache->PushBulk(buf, bufIndex, index);
+ AtomicAdd(*counter, bufIndex);
+ }
+
+ AtomicAdd(AllocatedMailboxCount, index - 1);
+
+ return ret;
+ }
+}
diff --git a/library/cpp/actors/core/mailbox.h b/library/cpp/actors/core/mailbox.h
new file mode 100644
index 0000000000..0bd9c4d314
--- /dev/null
+++ b/library/cpp/actors/core/mailbox.h
@@ -0,0 +1,553 @@
+#pragma once
+
+#include "defs.h"
+#include "event.h"
+#include "actor.h"
+#include "mailbox_queue_simple.h"
+#include "mailbox_queue_revolving.h"
+#include <library/cpp/actors/util/unordered_cache.h>
+#include <library/cpp/threading/queue/mpsc_htswap.h>
+#include <library/cpp/threading/queue/mpsc_read_as_filled.h>
+#include <util/generic/hash.h>
+#include <util/system/hp_timer.h>
+#include <util/generic/ptr.h>
+// TODO: clean all broken arcadia atomic stuff and replace with intrinsics
+
+namespace NActors {
+ class IActor;
+ class IExecutorPool;
+
+ const ui64 ARRAY_CAPACITY = 8;
+
+ // structure of hint:
+ // 1 bit: is service or direct hint
+ // 2 bits: pool index
+ // 17 bits: line
+ // 12 bits: index of mailbox inside of line
+
+ struct TMailboxHeader {
+ struct TMailboxActorPack {
+ enum EType {
+ Simple = 0,
+ Array = 1,
+ Map = 2
+ };
+ };
+
+ using TActorMap = THashMap<ui64, IActor*>;
+
+ struct TExecutionState {
+ enum EState {
+ // normal states
+ Inactive = 0,
+ Scheduled = 1,
+ Leaving = 2,
+ Executing = 3,
+ LeavingMarked = 4,
+ // states for free mailboxes (they can still be scheduled so we need duplicates)
+ Free = 5,
+ FreeScheduled = 6,
+ FreeLeaving = 7,
+ FreeExecuting = 8,
+ FreeLeavingMarked = 9,
+ };
+ };
+
+ volatile ui32 ExecutionState;
+ ui32 Reserved : 4; // never changes, always zero
+ ui32 Type : 4; // never changes
+ ui32 ActorPack : 2;
+ ui32 Knobs : 22;
+
+ struct TActorPair {
+ IActor *Actor;
+ ui64 ActorId;
+ };
+
+ struct alignas(64) TActorArray {
+ TActorPair Actors[ARRAY_CAPACITY];
+ };
+
+ union TActorsInfo {
+ TActorPair Simple;
+ struct {
+ TActorArray* ActorsArray;
+ ui64 ActorsCount;
+ } Array;
+ struct {
+ TActorMap* ActorsMap;
+ } Map;
+ } ActorsInfo;
+
+ TMailboxHeader(TMailboxType::EType type);
+ ~TMailboxHeader();
+
+ bool CleanupActors();
+
+ // this interface is used exclusively by executor thread, so implementation is there
+
+ bool MarkForSchedule(); // we put something in queue, check should we schedule?
+
+ bool LockForExecution(); // we got activation, try to lock mailbox
+ bool LockFromFree(); // try to claim mailbox from recycled (could fail if other thread process garbage)
+
+ void UnlockFromExecution1(); // prepare for releasing lock
+ bool UnlockFromExecution2(bool wouldReschedule); // proceed with releasing lock
+ bool UnlockAsFree(bool wouldReschedule); // preceed with releasing lock, but mark as free one
+
+ bool IsEmpty() const noexcept {
+ return (ActorPack == TMailboxActorPack::Simple && ActorsInfo.Simple.ActorId == 0);
+ }
+
+ template<typename T>
+ void ForEach(T&& callback) noexcept {
+ switch (ActorPack) {
+ case TMailboxActorPack::Simple:
+ if (ActorsInfo.Simple.ActorId) {
+ callback(ActorsInfo.Simple.ActorId, ActorsInfo.Simple.Actor);
+ }
+ break;
+
+ case TMailboxActorPack::Map:
+ for (const auto& [actorId, actor] : *ActorsInfo.Map.ActorsMap) {
+ callback(actorId, actor);
+ }
+ break;
+
+ case TMailboxActorPack::Array:
+ for (ui64 i = 0; i < ActorsInfo.Array.ActorsCount; ++i) {
+ auto& row = ActorsInfo.Array.ActorsArray->Actors[i];
+ callback(row.ActorId, row.Actor);
+ }
+ break;
+ }
+ }
+
+ IActor* FindActor(ui64 localActorId) noexcept {
+ switch (ActorPack) {
+ case TMailboxActorPack::Simple: {
+ if (ActorsInfo.Simple.ActorId == localActorId)
+ return ActorsInfo.Simple.Actor;
+ break;
+ }
+ case TMailboxActorPack::Map: {
+ TActorMap::iterator it = ActorsInfo.Map.ActorsMap->find(localActorId);
+ if (it != ActorsInfo.Map.ActorsMap->end())
+ return it->second;
+ break;
+ }
+ case TMailboxActorPack::Array: {
+ for (ui64 i = 0; i < ActorsInfo.Array.ActorsCount; ++i) {
+ if (ActorsInfo.Array.ActorsArray->Actors[i].ActorId == localActorId) {
+ return ActorsInfo.Array.ActorsArray->Actors[i].Actor;
+ }
+ }
+ break;
+ }
+ default:
+ Y_FAIL();
+ }
+ return nullptr;
+ }
+
+ void AttachActor(ui64 localActorId, IActor* actor) noexcept {
+ switch (ActorPack) {
+ case TMailboxActorPack::Simple: {
+ if (ActorsInfo.Simple.ActorId == 0) {
+ ActorsInfo.Simple.ActorId = localActorId;
+ ActorsInfo.Simple.Actor = actor;
+ return;
+ } else {
+ auto ar = new TActorArray;
+ ar->Actors[0] = ActorsInfo.Simple;
+ ar->Actors[1] = TActorPair{actor, localActorId};
+ ActorsInfo.Array.ActorsCount = 2;
+ ActorPack = TMailboxActorPack::Array;
+ ActorsInfo.Array.ActorsArray = ar;
+ }
+ break;
+ }
+ case TMailboxActorPack::Map: {
+ ActorsInfo.Map.ActorsMap->insert(TActorMap::value_type(localActorId, actor));
+ break;
+ }
+ case TMailboxActorPack::Array: {
+ if (ActorsInfo.Array.ActorsCount == ARRAY_CAPACITY) {
+ TActorMap* mp = new TActorMap();
+ for (ui64 i = 0; i < ARRAY_CAPACITY; ++i) {
+ mp->emplace(ActorsInfo.Array.ActorsArray->Actors[i].ActorId, ActorsInfo.Array.ActorsArray->Actors[i].Actor);
+ }
+ mp->emplace(localActorId, actor);
+ ActorPack = TMailboxActorPack::Map;
+ ActorsInfo.Array.ActorsCount = 0;
+ delete ActorsInfo.Array.ActorsArray;
+ ActorsInfo.Map.ActorsMap = mp;
+ } else {
+ ActorsInfo.Array.ActorsArray->Actors[ActorsInfo.Array.ActorsCount++] = TActorPair{actor, localActorId};
+ }
+ break;
+ }
+ default:
+ Y_FAIL();
+ }
+ }
+
+ IActor* DetachActor(ui64 localActorId) noexcept {
+ Y_VERIFY_DEBUG(FindActor(localActorId) != nullptr);
+
+ IActor* actorToDestruct = nullptr;
+
+ switch (ActorPack) {
+ case TMailboxActorPack::Simple: {
+ Y_VERIFY(ActorsInfo.Simple.ActorId == localActorId);
+ actorToDestruct = ActorsInfo.Simple.Actor;
+
+ ActorsInfo.Simple.ActorId = 0;
+ ActorsInfo.Simple.Actor = nullptr;
+ break;
+ }
+ case TMailboxActorPack::Map: {
+ TActorMap::iterator it = ActorsInfo.Map.ActorsMap->find(localActorId);
+ Y_VERIFY(it != ActorsInfo.Map.ActorsMap->end());
+
+ actorToDestruct = it->second;
+ ActorsInfo.Map.ActorsMap->erase(it);
+
+ if (ActorsInfo.Map.ActorsMap->size() == ARRAY_CAPACITY) {
+ auto ar = new TActorArray;
+ ui64 i = 0;
+ for (auto& [actorId, actor] : *ActorsInfo.Map.ActorsMap) {
+ ar->Actors[i++] = TActorPair{actor, actorId};
+ }
+ delete ActorsInfo.Map.ActorsMap;
+ ActorPack = TMailboxActorPack::Array;
+ ActorsInfo.Array.ActorsArray = ar;
+ ActorsInfo.Array.ActorsCount = ARRAY_CAPACITY;
+ }
+ break;
+ }
+ case TMailboxActorPack::Array: {
+ bool found = false;
+ for (ui64 i = 0; i < ActorsInfo.Array.ActorsCount; ++i) {
+ if (ActorsInfo.Array.ActorsArray->Actors[i].ActorId == localActorId) {
+ found = true;
+ actorToDestruct = ActorsInfo.Array.ActorsArray->Actors[i].Actor;
+ ActorsInfo.Array.ActorsArray->Actors[i] = ActorsInfo.Array.ActorsArray->Actors[ActorsInfo.Array.ActorsCount - 1];
+ ActorsInfo.Array.ActorsCount -= 1;
+ break;
+ }
+ }
+ Y_VERIFY(found);
+
+ if (ActorsInfo.Array.ActorsCount == 1) {
+ const TActorPair Actor = ActorsInfo.Array.ActorsArray->Actors[0];
+ delete ActorsInfo.Array.ActorsArray;
+ ActorPack = TMailboxActorPack::Simple;
+ ActorsInfo.Simple = Actor;
+ }
+ break;
+ }
+ }
+
+ return actorToDestruct;
+ }
+
+ std::pair<ui32, ui32> CountMailboxEvents(ui64 localActorId, ui32 maxTraverse);
+ };
+
+ class TMailboxTable : TNonCopyable {
+ private:
+ struct TMailboxLineHeader {
+ const TMailboxType::EType MailboxType;
+ const ui32 Index;
+ // some more stuff in first cache line, then goes mailboxes
+ ui8 Padding[52];
+
+ TMailboxLineHeader(TMailboxType::EType type, ui32 index)
+ : MailboxType(type)
+ , Index(index)
+ {
+ }
+ };
+ static_assert(sizeof(TMailboxLineHeader) <= 64, "expect sizeof(TMailboxLineHeader) <= 64");
+
+ constexpr static ui64 MaxLines = 131000; // somewhat less then 2^17.
+ constexpr static ui64 LineSize = 262144; // 64 * 2^12.
+
+ TAtomic LastAllocatedLine;
+ TAtomic AllocatedMailboxCount;
+
+ typedef TUnorderedCache<ui32, 512, 4> TMailboxCache;
+ TMailboxCache MailboxCacheSimple;
+ TAtomic CachedSimpleMailboxes;
+ TMailboxCache MailboxCacheRevolving;
+ TAtomic CachedRevolvingMailboxes;
+ TMailboxCache MailboxCacheHTSwap;
+ TAtomic CachedHTSwapMailboxes;
+ TMailboxCache MailboxCacheReadAsFilled;
+ TAtomic CachedReadAsFilledMailboxes;
+ TMailboxCache MailboxCacheTinyReadAsFilled;
+ TAtomic CachedTinyReadAsFilledMailboxes;
+
+ // and here goes large chunk of lines
+ // presented as array of static size to avoid sync on access
+ TMailboxLineHeader* volatile Lines[MaxLines];
+
+ ui32 AllocateNewLine(TMailboxType::EType type);
+ ui32 TryAllocateMailbox(TMailboxType::EType type, ui64 revolvingCounter);
+
+ public:
+ TMailboxTable();
+ ~TMailboxTable();
+
+ bool Cleanup(); // returns true if nothing found to destruct (so nothing new is possible to be created)
+
+ static const ui32 LineIndexShift = 12;
+ static const ui32 LineIndexMask = 0x1FFFFu << LineIndexShift;
+ static const ui32 LineHintMask = 0xFFFu;
+ static const ui32 PoolIndexShift = TActorId::PoolIndexShift;
+ static const ui32 PoolIndexMask = TActorId::PoolIndexMask;
+
+ static ui32 LineIndex(ui32 hint) {
+ return ((hint & LineIndexMask) >> LineIndexShift);
+ }
+ static ui32 PoolIndex(ui32 hint) {
+ return TActorId::PoolIndex(hint);
+ }
+
+ TMailboxHeader* Get(ui32 hint);
+ ui32 AllocateMailbox(TMailboxType::EType type, ui64 revolvingCounter);
+ void ReclaimMailbox(TMailboxType::EType type, ui32 hint, ui64 revolvingCounter);
+ ui64 GetAllocatedMailboxCount() const {
+ return RelaxedLoad(&AllocatedMailboxCount);
+ }
+
+ bool SendTo(TAutoPtr<IEventHandle>& ev, IExecutorPool* executorPool);
+
+ struct TSimpleMailbox: public TMailboxHeader {
+ // 4 bytes - state
+ // 4 bytes - knobs
+ // 8 bytes - actorid
+ // 8 bytes - actor*
+ TSimpleMailboxQueue<IEventHandle*, 64> Queue; // 24 + 8 bytes (body, lock)
+ NHPTimer::STime ScheduleMoment;
+
+ TSimpleMailbox();
+ ~TSimpleMailbox();
+
+ IEventHandle* Pop() {
+ return Queue.Pop();
+ }
+ IEventHandle* Head() {
+ return Queue.Head();
+ }
+
+ static TSimpleMailbox* Get(ui32 hint, void* line) {
+ return (TSimpleMailbox*)((ui8*)line + hint * 64); //
+ }
+ static const TMailboxType::EType MailboxType = TMailboxType::Simple;
+ constexpr static ui32 AlignedSize() {
+ return ((sizeof(TSimpleMailbox) + 63) / 64) * 64;
+ }
+
+ std::pair<ui32, ui32> CountSimpleMailboxEvents(ui64 localActorId, ui32 maxTraverse);
+ bool CleanupEvents();
+ };
+
+ static_assert(sizeof(TSimpleMailbox) == 64, "expect sizeof(TSimpleMailbox) == 64");
+
+ struct TRevolvingMailbox: public TMailboxHeader {
+ // 4 bytes - state
+ // 4 bytes - knobs
+ // 8 bytes - actorid
+ // 8 bytes - actor*
+ TRevolvingMailboxQueue<IEventHandle*, 3, 128>::TReader QueueReader; // 8 * 3 + 4 * 3 + (padding): 40 bytes
+ // here goes next cache-line, so less writers<-> reader interference
+ TRevolvingMailboxQueue<IEventHandle*, 3, 128>::TWriter QueueWriter; // 8 * 3 + 4 * 3 + 8 : 48 bytes
+ ui32 Reserved1;
+ ui32 Reserved2;
+ NHPTimer::STime ScheduleMoment;
+
+ TRevolvingMailbox();
+ ~TRevolvingMailbox();
+
+ IEventHandle* Pop() {
+ return QueueReader.Pop();
+ }
+ IEventHandle* Head() {
+ return QueueReader.Head();
+ }
+
+ static TRevolvingMailbox* Get(ui32 hint, void* line) {
+ return (TRevolvingMailbox*)((ui8*)line + 64 + (hint - 1) * 128);
+ }
+
+ constexpr static ui64 MaxMailboxesInLine() {
+ return (LineSize - 64) / AlignedSize();
+ }
+ static const TMailboxType::EType MailboxType = TMailboxType::Revolving;
+ constexpr static ui32 AlignedSize() {
+ return ((sizeof(TRevolvingMailbox) + 63) / 64) * 64;
+ }
+
+ std::pair<ui32, ui32> CountRevolvingMailboxEvents(ui64 localActorId, ui32 maxTraverse);
+ bool CleanupEvents();
+ };
+
+ static_assert(sizeof(TRevolvingMailbox) == 128, "expect sizeof(TRevolvingMailbox) == 128");
+
+ struct THTSwapMailbox: public TMailboxHeader {
+ using TQueueType = NThreading::THTSwapQueue<IEventHandle*>;
+
+ TQueueType Queue;
+ NHPTimer::STime ScheduleMoment;
+ char Padding_[16];
+
+ THTSwapMailbox()
+ : TMailboxHeader(TMailboxType::HTSwap)
+ , ScheduleMoment(0)
+ {
+ }
+
+ ~THTSwapMailbox() {
+ CleanupEvents();
+ }
+
+ IEventHandle* Pop() {
+ return Queue.Pop();
+ }
+
+ IEventHandle* Head() {
+ return Queue.Peek();
+ }
+
+ static THTSwapMailbox* Get(ui32 hint, void* line) {
+ return (THTSwapMailbox*)((ui8*)line + 64 + (hint - 1) * 64);
+ }
+
+ constexpr static ui64 MaxMailboxesInLine() {
+ return (LineSize - 64) / AlignedSize();
+ }
+
+ static const TMailboxType::EType MailboxType = TMailboxType::HTSwap;
+
+ constexpr static ui32 AlignedSize() {
+ return ((sizeof(THTSwapMailbox) + 63) / 64) * 64;
+ }
+
+ bool CleanupEvents() {
+ const bool done = (Queue.Peek() == nullptr);
+ while (IEventHandle* ev = Queue.Pop())
+ delete ev;
+ return done;
+ }
+ };
+
+ static_assert(sizeof(THTSwapMailbox) == 64,
+ "expect sizeof(THTSwapMailbox) == 64");
+
+ struct TReadAsFilledMailbox: public TMailboxHeader {
+ using TQueueType = NThreading::TReadAsFilledQueue<IEventHandle>;
+
+ TQueueType Queue;
+ NHPTimer::STime ScheduleMoment;
+ char Padding_[8];
+
+ TReadAsFilledMailbox()
+ : TMailboxHeader(TMailboxType::ReadAsFilled)
+ , ScheduleMoment(0)
+ {
+ }
+
+ ~TReadAsFilledMailbox() {
+ CleanupEvents();
+ }
+
+ IEventHandle* Pop() {
+ return Queue.Pop();
+ }
+
+ IEventHandle* Head() {
+ return Queue.Peek();
+ }
+
+ static TReadAsFilledMailbox* Get(ui32 hint, void* line) {
+ return (TReadAsFilledMailbox*)((ui8*)line + 64 + (hint - 1) * 192);
+ }
+
+ constexpr static ui64 MaxMailboxesInLine() {
+ return (LineSize - 64) / AlignedSize();
+ }
+
+ static const TMailboxType::EType MailboxType =
+ TMailboxType::ReadAsFilled;
+
+ constexpr static ui32 AlignedSize() {
+ return ((sizeof(TReadAsFilledMailbox) + 63) / 64) * 64;
+ }
+
+ bool CleanupEvents() {
+ const bool done = (Queue.Peek() == nullptr);
+ while (IEventHandle* ev = Queue.Pop())
+ delete ev;
+ return done;
+ }
+ };
+
+ static_assert(sizeof(TReadAsFilledMailbox) == 192,
+ "expect sizeof(TReadAsFilledMailbox) == 192");
+
+ struct TTinyReadAsFilledMailbox: public TMailboxHeader {
+ using TQueueType = NThreading::TReadAsFilledQueue<
+ IEventHandle,
+ NThreading::TRaFQueueBunchSize<4>>;
+
+ TQueueType Queue;
+ NHPTimer::STime ScheduleMoment;
+ char Padding_[8];
+
+ TTinyReadAsFilledMailbox()
+ : TMailboxHeader(TMailboxType::TinyReadAsFilled)
+ , ScheduleMoment(0)
+ {
+ }
+
+ ~TTinyReadAsFilledMailbox() {
+ CleanupEvents();
+ }
+
+ IEventHandle* Pop() {
+ return Queue.Pop();
+ }
+
+ IEventHandle* Head() {
+ return Queue.Peek();
+ }
+
+ static TTinyReadAsFilledMailbox* Get(ui32 hint, void* line) {
+ return (TTinyReadAsFilledMailbox*)((ui8*)line + 64 + (hint - 1) * 192);
+ }
+
+ constexpr static ui64 MaxMailboxesInLine() {
+ return (LineSize - 64) / AlignedSize();
+ }
+
+ static const TMailboxType::EType MailboxType =
+ TMailboxType::TinyReadAsFilled;
+
+ constexpr static ui32 AlignedSize() {
+ return ((sizeof(TTinyReadAsFilledMailbox) + 63) / 64) * 64;
+ }
+
+ bool CleanupEvents() {
+ const bool done = (Queue.Peek() == nullptr);
+ while (IEventHandle* ev = Queue.Pop())
+ delete ev;
+ return done;
+ }
+ };
+
+ static_assert(sizeof(TTinyReadAsFilledMailbox) == 192,
+ "expect sizeof(TTinyReadAsFilledMailbox) == 192");
+ };
+}
diff --git a/library/cpp/actors/core/mailbox_queue_revolving.h b/library/cpp/actors/core/mailbox_queue_revolving.h
new file mode 100644
index 0000000000..b0e78a18db
--- /dev/null
+++ b/library/cpp/actors/core/mailbox_queue_revolving.h
@@ -0,0 +1,214 @@
+#pragma once
+
+#include "defs.h"
+#include <library/cpp/actors/util/queue_chunk.h>
+
+namespace NActors {
+ // add some concurrency to basic queue to avoid hangs under contention (we pay with memory, so use only when really expect contention)
+ // ordering: every completed push guarantied to seen before any not-yet-initiated push. parallel pushes could reorder (and that is natural for concurrent queues).
+ // try to place reader/writer on different cache-lines to avoid congestion b/w reader and writers.
+ // if strict ordering does not matter - look at TManyOneQueue.
+
+ template <typename T, ui32 TWriteConcurrency = 3, ui32 TSize = 128>
+ class TRevolvingMailboxQueue {
+ static_assert(std::is_integral<T>::value || std::is_pointer<T>::value, "expect std::is_integral<T>::value || std::is_pointer<T>::value");
+
+ struct TValTagPair {
+ volatile T Value;
+ volatile ui64 Tag;
+ };
+
+ typedef TQueueChunk<TValTagPair, TSize> TChunk;
+
+ static_assert(sizeof(TAtomic) == sizeof(TChunk*), "expect sizeof(TAtomic) == sizeof(TChunk*)");
+ static_assert(sizeof(TAtomic) == sizeof(ui64), "expect sizeof(TAtomic) == sizeof(ui64)");
+
+ public:
+ class TWriter;
+
+ class TReader {
+ TChunk* ReadFrom[TWriteConcurrency];
+ ui32 ReadPosition[TWriteConcurrency];
+
+ friend class TRevolvingMailboxQueue<T, TWriteConcurrency, TSize>::TWriter; // for access to ReadFrom in constructor
+
+ bool ChunkHead(ui32 idx, ui64* tag, T* value) {
+ TChunk* head = ReadFrom[idx];
+ const ui32 pos = ReadPosition[idx];
+ if (pos != TChunk::EntriesCount) {
+ if (const T xval = AtomicLoad(&head->Entries[pos].Value)) {
+ const ui64 xtag = head->Entries[pos].Tag;
+ if (xtag < *tag) {
+ *value = xval;
+ *tag = xtag;
+ return true;
+ }
+ }
+ } else if (TChunk* next = AtomicLoad(&head->Next)) {
+ ReadFrom[idx] = next;
+ delete head;
+ ReadPosition[idx] = 0;
+ return ChunkHead(idx, tag, value);
+ }
+
+ return false;
+ }
+
+ T Head(bool pop) {
+ ui64 tag = Max<ui64>();
+ T ret = T{};
+ ui32 idx = 0;
+
+ for (ui32 i = 0; i < TWriteConcurrency; ++i)
+ if (ChunkHead(i, &tag, &ret))
+ idx = i;
+
+ // w/o second pass we could reorder updates with 'already scanned' range
+ if (ret) {
+ for (ui32 i = 0; i < TWriteConcurrency; ++i)
+ if (ChunkHead(i, &tag, &ret))
+ idx = i;
+ }
+
+ if (pop && ret)
+ ++ReadPosition[idx];
+
+ return ret;
+ }
+
+ public:
+ TReader() {
+ for (ui32 i = 0; i != TWriteConcurrency; ++i) {
+ ReadFrom[i] = new TChunk();
+ ReadPosition[i] = 0;
+ }
+ }
+
+ ~TReader() {
+ Y_VERIFY_DEBUG(Head() == 0);
+ for (ui32 i = 0; i < TWriteConcurrency; ++i)
+ delete ReadFrom[i];
+ }
+
+ T Pop() {
+ return Head(true);
+ }
+
+ T Head() {
+ return Head(false);
+ }
+
+ class TReadIterator {
+ TChunk* ReadFrom[TWriteConcurrency];
+ ui32 ReadPosition[TWriteConcurrency];
+
+ bool ChunkHead(ui32 idx, ui64* tag, T* value) {
+ TChunk* head = ReadFrom[idx];
+ const ui32 pos = ReadPosition[idx];
+ if (pos != TChunk::EntriesCount) {
+ if (const T xval = AtomicLoad(&head->Entries[pos].Value)) {
+ const ui64 xtag = head->Entries[pos].Tag;
+ if (xtag < *tag) {
+ *value = xval;
+ *tag = xtag;
+ return true;
+ }
+ }
+ } else if (TChunk* next = AtomicLoad(&head->Next)) {
+ ReadFrom[idx] = next;
+ ReadPosition[idx] = 0;
+ return ChunkHead(idx, tag, value);
+ }
+
+ return false;
+ }
+
+ public:
+ TReadIterator(TChunk* const* readFrom, const ui32* readPosition) {
+ memcpy(ReadFrom, readFrom, TWriteConcurrency * sizeof(TChunk*));
+ memcpy(ReadPosition, readPosition, TWriteConcurrency * sizeof(ui32));
+ }
+
+ T Next() {
+ ui64 tag = Max<ui64>();
+ T ret = T{};
+ ui32 idx = 0;
+
+ for (ui32 i = 0; i < TWriteConcurrency; ++i)
+ if (ChunkHead(i, &tag, &ret))
+ idx = i;
+
+ // w/o second pass we could reorder updates with 'already scanned' range
+ if (ret) {
+ for (ui32 i = 0; i < TWriteConcurrency; ++i)
+ if (ChunkHead(i, &tag, &ret))
+ idx = i;
+ }
+
+ if (ret)
+ ++ReadPosition[idx];
+
+ return ret;
+ }
+ };
+
+ TReadIterator Iterator() const {
+ return TReadIterator(ReadFrom, ReadPosition);
+ }
+ };
+
+ class TWriter {
+ TChunk* volatile WriteTo[TWriteConcurrency];
+ volatile ui64 Tag;
+ ui32 WritePosition[TWriteConcurrency];
+
+ public:
+ TWriter(const TReader& reader)
+ : Tag(0)
+ {
+ for (ui32 i = 0; i != TWriteConcurrency; ++i) {
+ WriteTo[i] = reader.ReadFrom[i];
+ WritePosition[i] = 0;
+ }
+ }
+
+ bool TryPush(T x) {
+ Y_VERIFY(x != 0);
+
+ for (ui32 i = 0; i != TWriteConcurrency; ++i) {
+ if (RelaxedLoad(&WriteTo[i]) != nullptr) {
+ if (TChunk* writeTo = AtomicSwap(&WriteTo[i], nullptr)) {
+ const ui64 nextTag = AtomicIncrement(Tag);
+ Y_VERIFY_DEBUG(nextTag < Max<ui64>());
+ const ui32 writePosition = WritePosition[i];
+ if (writePosition != TChunk::EntriesCount) {
+ writeTo->Entries[writePosition].Tag = nextTag;
+ AtomicStore(&writeTo->Entries[writePosition].Value, x);
+ ++WritePosition[i];
+ } else {
+ TChunk* next = new TChunk();
+ next->Entries[0].Tag = nextTag;
+ next->Entries[0].Value = x;
+ AtomicStore(&writeTo->Next, next);
+ writeTo = next;
+ WritePosition[i] = 1;
+ }
+ AtomicStore(WriteTo + i, writeTo);
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ ui32 Push(T x) {
+ ui32 spins = 0;
+ while (!TryPush(x)) {
+ ++spins;
+ SpinLockPause();
+ }
+ return spins;
+ }
+ };
+ };
+}
diff --git a/library/cpp/actors/core/mailbox_queue_simple.h b/library/cpp/actors/core/mailbox_queue_simple.h
new file mode 100644
index 0000000000..2e44c21adb
--- /dev/null
+++ b/library/cpp/actors/core/mailbox_queue_simple.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include "defs.h"
+#include <library/cpp/actors/util/ticket_lock.h>
+#include <library/cpp/actors/util/queue_oneone_inplace.h>
+
+namespace NActors {
+ // dead-simple one-one queue, based on serializability guaranties of x64 and ticket lock to ensure writer unicity.
+ template <typename T, ui32 TSize>
+ class TSimpleMailboxQueue {
+ TOneOneQueueInplace<T, TSize> Queue;
+ TTicketLock Lock;
+
+ public:
+ ui32 Push(T x) noexcept {
+ const ui32 spins = Lock.Acquire();
+ Queue.Push(x);
+ Lock.Release();
+ return spins;
+ }
+
+ T Head() {
+ return Queue.Head();
+ }
+
+ T Pop() {
+ return Queue.Pop();
+ }
+
+ typename TOneOneQueueInplace<T, TSize>::TReadIterator ReadIterator() {
+ return Queue.Iterator();
+ }
+ };
+}
diff --git a/library/cpp/actors/core/memory_track.cpp b/library/cpp/actors/core/memory_track.cpp
new file mode 100644
index 0000000000..5f422116be
--- /dev/null
+++ b/library/cpp/actors/core/memory_track.cpp
@@ -0,0 +1,38 @@
+#include "memory_track.h"
+#include "memory_tracker.h"
+
+namespace NActors {
+namespace NMemory {
+
+namespace NPrivate {
+
+TThreadLocalInfo::TThreadLocalInfo()
+ : Metrics(TMemoryTracker::Instance()->GetCount())
+{
+ TMemoryTracker::Instance()->OnCreateThread(this);
+}
+
+TThreadLocalInfo::~TThreadLocalInfo() {
+ TMemoryTracker::Instance()->OnDestroyThread(this);
+}
+
+TMetric* TThreadLocalInfo::GetMetric(size_t index) {
+ if (Y_UNLIKELY(index >= Metrics.size())) {
+ return &Null;
+ }
+ return &Metrics[index];
+}
+
+const std::vector<TMetric>& TThreadLocalInfo::GetMetrics() const {
+ return Metrics;
+}
+
+size_t TBaseLabel::RegisterStaticMemoryLabel(const char* name, bool hasSensor) {
+ return TMemoryTracker::Instance()->RegisterStaticMemoryLabel(name, hasSensor);
+}
+
+}
+
+}
+}
+
diff --git a/library/cpp/actors/core/memory_track.h b/library/cpp/actors/core/memory_track.h
new file mode 100644
index 0000000000..6035333eeb
--- /dev/null
+++ b/library/cpp/actors/core/memory_track.h
@@ -0,0 +1,293 @@
+#pragma once
+
+#include <vector>
+
+#include <util/system/type_name.h>
+#include <util/thread/singleton.h>
+
+#define ENABLE_MEMORY_TRACKING
+
+namespace NActors {
+namespace NMemory {
+
+namespace NPrivate {
+
+class TMetric {
+ std::atomic<ssize_t> Memory;
+ std::atomic<ssize_t> Count;
+
+ void Copy(const TMetric& other) {
+ Memory.store(other.GetMemory(), std::memory_order_relaxed);
+ Count.store(other.GetCount(), std::memory_order_relaxed);
+ }
+
+public:
+ TMetric()
+ : Memory(0)
+ , Count(0)
+ {}
+
+ inline TMetric(const TMetric& other) {
+ Copy(other);
+ }
+
+ inline TMetric(TMetric&& other) {
+ Copy(other);
+ }
+
+ inline TMetric& operator=(const TMetric& other) {
+ Copy(other);
+ return *this;
+ }
+
+ inline TMetric& operator=(TMetric&& other) {
+ Copy(other);
+ return *this;
+ }
+
+ inline ssize_t GetMemory() const {
+ return Memory.load(std::memory_order_relaxed);
+ }
+ inline void SetMemory(ssize_t value) {
+ Memory.store(value, std::memory_order_relaxed);
+ }
+
+ inline ssize_t GetCount() const {
+ return Count.load(std::memory_order_relaxed);
+ }
+ inline void SetCount(ssize_t value) {
+ Count.store(value, std::memory_order_relaxed);
+ }
+
+ inline void operator+=(const TMetric& other) {
+ SetMemory(GetMemory() + other.GetMemory());
+ SetCount(GetCount() + other.GetCount());
+ }
+
+ inline void CalculatePeak(const TMetric& other) {
+ SetMemory(Max(GetMemory(), other.GetMemory()));
+ SetCount(Max(GetCount(), other.GetCount()));
+ }
+
+ inline void Add(size_t size) {
+ SetMemory(GetMemory() + size);
+ SetCount(GetCount() + 1);
+ }
+
+ inline void Sub(size_t size) {
+ SetMemory(GetMemory() - size);
+ SetCount(GetCount() - 1);
+ }
+};
+
+
+class TThreadLocalInfo {
+public:
+ TThreadLocalInfo();
+ ~TThreadLocalInfo();
+
+ TMetric* GetMetric(size_t index);
+ const std::vector<TMetric>& GetMetrics() const;
+
+private:
+ std::vector<TMetric> Metrics;
+
+ inline static TMetric Null = {};
+};
+
+
+class TBaseLabel {
+protected:
+ static size_t RegisterStaticMemoryLabel(const char* name, bool hasSensor);
+
+ inline static TMetric* GetLocalMetric(size_t index) {
+ return FastTlsSingleton<TThreadLocalInfo>()->GetMetric(index);
+ }
+};
+
+
+template <const char* Name>
+class TNameLabel
+ : TBaseLabel
+{
+public:
+ static void Add(size_t size) {
+#if defined(ENABLE_MEMORY_TRACKING)
+ Y_UNUSED(MetricInit);
+
+ if (Y_UNLIKELY(!Metric)) {
+ Metric = GetLocalMetric(Index);
+ }
+
+ Metric->Add(size);
+#else
+ Y_UNUSED(size);
+#endif
+ }
+
+ static void Sub(size_t size) {
+#if defined(ENABLE_MEMORY_TRACKING)
+ Y_UNUSED(MetricInit);
+
+ if (Y_UNLIKELY(!Metric)) {
+ Metric = GetLocalMetric(Index);
+ }
+
+ Metric->Sub(size);
+#else
+ Y_UNUSED(size);
+#endif
+ }
+
+private:
+#if defined(ENABLE_MEMORY_TRACKING)
+ inline static size_t Index = Max<size_t>();
+ inline static struct TMetricInit {
+ TMetricInit() {
+ Index = RegisterStaticMemoryLabel(Name, true);
+ }
+ } MetricInit;
+
+ inline static thread_local TMetric* Metric = nullptr;
+#endif
+};
+
+
+template <typename TType>
+class TTypeLabel
+ : TBaseLabel
+{
+public:
+ static void Add(size_t size) {
+#if defined(ENABLE_MEMORY_TRACKING)
+ Y_UNUSED(MetricInit);
+
+ if (Y_UNLIKELY(!Metric)) {
+ Metric = GetLocalMetric(Index);
+ }
+
+ Metric->Add(size);
+#else
+ Y_UNUSED(size);
+#endif
+ }
+
+ static void Sub(size_t size) {
+#if defined(ENABLE_MEMORY_TRACKING)
+ Y_UNUSED(MetricInit);
+
+ if (Y_UNLIKELY(!Metric)) {
+ Metric = GetLocalMetric(Index);
+ }
+
+ Metric->Sub(size);
+#else
+ Y_UNUSED(size);
+#endif
+ }
+
+private:
+#if defined(ENABLE_MEMORY_TRACKING)
+ inline static size_t Index = Max<size_t>();
+ inline static struct TMetricInit {
+ TMetricInit() {
+ Index = RegisterStaticMemoryLabel(TypeName<TType>().c_str(), false);
+ }
+ } MetricInit;
+
+ inline static thread_local TMetric* Metric = nullptr;
+#endif
+};
+
+
+template <typename T>
+struct TTrackHelper {
+#if defined(ENABLE_MEMORY_TRACKING)
+ void* operator new(size_t size) {
+ T::Add(size);
+ return malloc(size);
+ }
+
+ void* operator new[](size_t size) {
+ T::Add(size);
+ return malloc(size);
+ }
+
+ void operator delete(void* ptr, size_t size) {
+ T::Sub(size);
+ free(ptr);
+ }
+
+ void operator delete[](void* ptr, size_t size) {
+ T::Sub(size);
+ free(ptr);
+ }
+#endif
+};
+
+template <typename TType, typename T>
+struct TAllocHelper {
+ typedef size_t size_type;
+ typedef TType value_type;
+ typedef TType* pointer;
+ typedef const TType* const_pointer;
+
+ struct propagate_on_container_copy_assignment : public std::false_type {};
+ struct propagate_on_container_move_assignment : public std::false_type {};
+ struct propagate_on_container_swap : public std::false_type {};
+
+ pointer allocate(size_type n, const void* hint = nullptr) {
+ Y_UNUSED(hint);
+ auto size = n * sizeof(TType);
+ T::Add(size);
+ return (pointer)malloc(size);
+ }
+
+ void deallocate(pointer ptr, size_t n) {
+ auto size = n * sizeof(TType);
+ T::Sub(size);
+ free((void*)ptr);
+ }
+};
+
+} // NPrivate
+
+
+template <const char* Name>
+using TLabel = NPrivate::TNameLabel<Name>;
+
+template <typename TType, const char* Name = nullptr>
+struct TTrack
+ : public NPrivate::TTrackHelper<NPrivate::TNameLabel<Name>>
+{
+};
+
+template <typename TType>
+struct TTrack<TType, nullptr>
+ : public NPrivate::TTrackHelper<NPrivate::TTypeLabel<TType>>
+{
+};
+
+template <typename TType, const char* Name = nullptr>
+struct TAlloc
+ : public NPrivate::TAllocHelper<TType, NPrivate::TNameLabel<Name>>
+{
+ template<typename U>
+ struct rebind {
+ typedef TAlloc<U, Name> other;
+ };
+};
+
+template <typename TType>
+struct TAlloc<TType, nullptr>
+ : public NPrivate::TAllocHelper<TType, NPrivate::TTypeLabel<TType>>
+{
+ template<typename U>
+ struct rebind {
+ typedef TAlloc<U> other;
+ };
+};
+
+}
+}
+
diff --git a/library/cpp/actors/core/memory_tracker.cpp b/library/cpp/actors/core/memory_tracker.cpp
new file mode 100644
index 0000000000..8a12452c71
--- /dev/null
+++ b/library/cpp/actors/core/memory_tracker.cpp
@@ -0,0 +1,103 @@
+#include "memory_tracker.h"
+
+#include <util/generic/xrange.h>
+
+namespace NActors {
+namespace NMemory {
+
+namespace NPrivate {
+
+TMemoryTracker* TMemoryTracker::Instance() {
+ return SingletonWithPriority<TMemoryTracker, 0>();
+}
+
+void TMemoryTracker::Initialize() {
+ GlobalMetrics.resize(Indices.size());
+}
+
+const std::map<TString, size_t>& TMemoryTracker::GetMetricIndices() const {
+ return Indices;
+}
+
+const std::unordered_set<size_t>& TMemoryTracker::GetSensors() const {
+ return Sensors;
+}
+
+TString TMemoryTracker::GetName(size_t index) const {
+ return Names[index];
+}
+
+size_t TMemoryTracker::GetCount() const {
+ return Indices.size();
+}
+
+void TMemoryTracker::GatherMetrics(std::vector<TMetric>& metrics) const {
+ metrics.resize(0);
+ auto count = GetCount();
+
+ if (!count || GlobalMetrics.size() != count) {
+ return;
+ }
+
+ TReadGuard guard(LockThreadInfo);
+
+ metrics.resize(count);
+ for (size_t i : xrange(count)) {
+ metrics[i] += GlobalMetrics[i];
+ }
+
+ for (auto info : ThreadInfo) {
+ auto& localMetrics = info->GetMetrics();
+ if (localMetrics.size() == count) {
+ for (size_t i : xrange(count)) {
+ metrics[i] += localMetrics[i];
+ }
+ }
+ }
+}
+
+size_t TMemoryTracker::RegisterStaticMemoryLabel(const char* name, bool hasSensor) {
+ size_t index = 0;
+ auto found = Indices.find(name);
+ if (found == Indices.end()) {
+ TString str(name);
+ auto next = Names.size();
+ Indices.emplace(str, next);
+ Names.push_back(str);
+ index = next;
+ } else {
+ index = found->second;
+ }
+
+ if (hasSensor) {
+ Sensors.emplace(index);
+ }
+ return index;
+}
+
+void TMemoryTracker::OnCreateThread(TThreadLocalInfo* info) {
+ TWriteGuard guard(LockThreadInfo);
+ ThreadInfo.insert(info);
+}
+
+void TMemoryTracker::OnDestroyThread(TThreadLocalInfo* info) {
+ TWriteGuard guard(LockThreadInfo);
+
+ auto count = GetCount();
+ if (count && GlobalMetrics.size() == count) {
+ const auto& localMetrics = info->GetMetrics();
+ if (localMetrics.size() == count) {
+ for (size_t i : xrange(count)) {
+ GlobalMetrics[i] += localMetrics[i];
+ }
+ }
+ }
+
+ ThreadInfo.erase(info);
+}
+
+}
+
+}
+}
+
diff --git a/library/cpp/actors/core/memory_tracker.h b/library/cpp/actors/core/memory_tracker.h
new file mode 100644
index 0000000000..e74508191b
--- /dev/null
+++ b/library/cpp/actors/core/memory_tracker.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include "memory_track.h"
+
+#include <map>
+#include <unordered_map>
+#include <unordered_set>
+
+#include <util/system/rwlock.h>
+
+namespace NActors {
+namespace NMemory {
+
+namespace NPrivate {
+
+class TMemoryTracker {
+public:
+ static TMemoryTracker* Instance();
+
+ void Initialize();
+
+ const std::map<TString, size_t>& GetMetricIndices() const;
+ const std::unordered_set<size_t>& GetSensors() const;
+ TString GetName(size_t index) const;
+ size_t GetCount() const;
+
+ void GatherMetrics(std::vector<TMetric>& metrics) const;
+
+private:
+ size_t RegisterStaticMemoryLabel(const char* name, bool hasSensor);
+
+ void OnCreateThread(TThreadLocalInfo* info);
+ void OnDestroyThread(TThreadLocalInfo* info);
+
+private:
+ std::map<TString, size_t> Indices;
+ std::vector<TString> Names;
+
+ std::vector<TMetric> GlobalMetrics;
+
+ std::unordered_set<size_t> Sensors;
+
+ std::unordered_set<TThreadLocalInfo*> ThreadInfo;
+ TRWMutex LockThreadInfo;
+
+ friend class TThreadLocalInfo;
+ friend class TBaseLabel;
+};
+
+}
+
+}
+}
diff --git a/library/cpp/actors/core/memory_tracker_ut.cpp b/library/cpp/actors/core/memory_tracker_ut.cpp
new file mode 100644
index 0000000000..d168214da6
--- /dev/null
+++ b/library/cpp/actors/core/memory_tracker_ut.cpp
@@ -0,0 +1,262 @@
+#include "memory_tracker.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/system/hp_timer.h>
+#include <util/system/thread.h>
+
+namespace NActors {
+namespace NMemory {
+
+Y_UNIT_TEST_SUITE(TMemoryTrackerTest) {
+
+#if defined(ENABLE_MEMORY_TRACKING)
+
+using namespace NPrivate;
+
+size_t FindLabelIndex(const char* label) {
+ auto indices = TMemoryTracker::Instance()->GetMetricIndices();
+ auto it = indices.find(label);
+ UNIT_ASSERT(it != indices.end());
+ return it->second;
+}
+
+
+struct TTypeLabeled
+ : public NActors::NMemory::TTrack<TTypeLabeled>
+{
+ char payload[16];
+};
+
+static constexpr char NamedLabel[] = "NamedLabel";
+
+struct TNameLabeled
+ : public NActors::NMemory::TTrack<TNameLabeled, NamedLabel>
+{
+ char payload[32];
+};
+
+Y_UNIT_TEST(Gathering)
+{
+ TMemoryTracker::Instance()->Initialize();
+
+ auto* typed = new TTypeLabeled;
+ auto* typedArray = new TTypeLabeled[3];
+
+ auto* named = new TNameLabeled;
+ auto* namedArray = new TNameLabeled[5];
+ NActors::NMemory::TLabel<NamedLabel>::Add(100);
+
+ std::vector<TMetric> metrics;
+ TMemoryTracker::Instance()->GatherMetrics(metrics);
+
+ auto typeIndex = FindLabelIndex(TypeName<TTypeLabeled>().c_str());
+ UNIT_ASSERT(typeIndex < metrics.size());
+ UNIT_ASSERT(metrics[typeIndex].GetMemory() == sizeof(TTypeLabeled) * 4 + sizeof(size_t));
+ UNIT_ASSERT(metrics[typeIndex].GetCount() == 2);
+
+ auto nameIndex = FindLabelIndex(NamedLabel);
+ UNIT_ASSERT(nameIndex < metrics.size());
+ UNIT_ASSERT(metrics[nameIndex].GetMemory() == sizeof(TNameLabeled) * 6 + sizeof(size_t) + 100);
+ UNIT_ASSERT(metrics[nameIndex].GetCount() == 3);
+
+ NActors::NMemory::TLabel<NamedLabel>::Sub(100);
+ delete [] namedArray;
+ delete named;
+
+ delete [] typedArray;
+ delete typed;
+
+ TMemoryTracker::Instance()->GatherMetrics(metrics);
+
+ UNIT_ASSERT(metrics[typeIndex].GetMemory() == 0);
+ UNIT_ASSERT(metrics[typeIndex].GetCount() == 0);
+
+ UNIT_ASSERT(metrics[nameIndex].GetMemory() == 0);
+ UNIT_ASSERT(metrics[nameIndex].GetCount() == 0);
+}
+
+
+static constexpr char InContainerLabel[] = "InContainerLabel";
+
+struct TInContainer {
+ char payload[16];
+};
+
+Y_UNIT_TEST(Containers) {
+ TMemoryTracker::Instance()->Initialize();
+
+ std::vector<TInContainer, NActors::NMemory::TAlloc<TInContainer>> vecT;
+ vecT.resize(5);
+
+ std::vector<TInContainer, NActors::NMemory::TAlloc<TInContainer, InContainerLabel>> vecN;
+ vecN.resize(7);
+
+ using TKey = int;
+
+ std::map<TKey, TInContainer, std::less<TKey>,
+ NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>>> mapT;
+ mapT.emplace(0, TInContainer());
+ mapT.emplace(1, TInContainer());
+
+ std::map<TKey, TInContainer, std::less<TKey>,
+ NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>, InContainerLabel>> mapN;
+ mapN.emplace(0, TInContainer());
+
+ std::unordered_map<TKey, TInContainer, std::hash<TKey>, std::equal_to<TKey>,
+ NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>>> umapT;
+ umapT.emplace(0, TInContainer());
+
+ std::unordered_map<TKey, TInContainer, std::hash<TKey>, std::equal_to<TKey>,
+ NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>, InContainerLabel>> umapN;
+ umapN.emplace(0, TInContainer());
+ umapN.emplace(1, TInContainer());
+
+ std::vector<TMetric> metrics;
+ TMemoryTracker::Instance()->GatherMetrics(metrics);
+
+ auto indices = TMemoryTracker::Instance()->GetMetricIndices();
+ for (auto& [name, index] : indices) {
+ Cerr << "---- " << name
+ << ": memory = " << metrics[index].GetMemory()
+ << ", count = " << metrics[index].GetCount() << Endl;
+ }
+
+ auto vecTIndex = FindLabelIndex(TypeName<TInContainer>().c_str());
+ UNIT_ASSERT(metrics[vecTIndex].GetMemory() >= ssize_t(sizeof(TInContainer) * 5));
+ UNIT_ASSERT(metrics[vecTIndex].GetCount() == 1);
+
+ auto labelIndex = FindLabelIndex(InContainerLabel);
+ UNIT_ASSERT(metrics[labelIndex].GetCount() == 5);
+ UNIT_ASSERT(metrics[labelIndex].GetMemory() >= ssize_t(
+ sizeof(TInContainer) * 7 +
+ sizeof(decltype(mapN)::value_type) +
+ sizeof(decltype(umapN)::value_type) * 2));
+}
+
+
+static constexpr char InThreadLabel[] = "InThreadLabel";
+
+struct TInThread
+ : public NActors::NMemory::TTrack<TInThread, InThreadLabel>
+{
+ char payload[16];
+};
+
+void* ThreadProc(void*) {
+ return new TInThread;
+}
+
+Y_UNIT_TEST(Threads) {
+ TMemoryTracker::Instance()->Initialize();
+
+ auto index = FindLabelIndex(InThreadLabel);
+
+ auto* object1 = new TInThread;
+
+ std::vector<TMetric> metrics;
+ TMemoryTracker::Instance()->GatherMetrics(metrics);
+ UNIT_ASSERT(metrics[index].GetMemory() == sizeof(TInThread));
+ UNIT_ASSERT(metrics[index].GetCount() == 1);
+
+ TThread thread(&ThreadProc, nullptr);
+ thread.Start();
+ auto* object2 = static_cast<TInThread*>(thread.Join());
+
+ TMemoryTracker::Instance()->GatherMetrics(metrics);
+ UNIT_ASSERT(metrics[index].GetMemory() == sizeof(TInThread) * 2);
+ UNIT_ASSERT(metrics[index].GetCount() == 2);
+
+ delete object2;
+
+ TMemoryTracker::Instance()->GatherMetrics(metrics);
+ UNIT_ASSERT(metrics[index].GetMemory() == sizeof(TInThread));
+ UNIT_ASSERT(metrics[index].GetCount() == 1);
+
+ delete object1;
+}
+
+
+struct TNotTracked {
+ char payload[16];
+};
+
+struct TTracked
+ : public NActors::NMemory::TTrack<TTracked>
+{
+ char payload[16];
+};
+
+template <typename T>
+double MeasureAllocations() {
+ constexpr size_t objectsCount = 4 << 20;
+
+ std::vector<T*> objects;
+ objects.resize(objectsCount);
+
+ THPTimer timer;
+
+ for (size_t i = 0; i < objectsCount; ++i) {
+ objects[i] = new T;
+ }
+
+ for (size_t i = 0; i < objectsCount; ++i) {
+ delete objects[i];
+ }
+
+ auto seconds = timer.Passed();
+ Cerr << "---- objects: " << objectsCount << ", time: " << seconds << Endl;
+ return seconds;
+}
+
+Y_UNIT_TEST(Performance) {
+ TMemoryTracker::Instance()->Initialize();
+
+ constexpr size_t Runs = 16;
+
+ Cerr << "---- warmup" << Endl;
+ MeasureAllocations<TNotTracked>();
+ MeasureAllocations<TTracked>();
+
+ std::vector<double> noTrack;
+ std::vector<double> track;
+
+ for (size_t run = 0; run < Runs; ++run) {
+ Cerr << "---- no track" << Endl;
+ auto time = MeasureAllocations<TNotTracked>();
+ noTrack.push_back(time);
+
+ Cerr << "---- track" << Endl;
+ time = MeasureAllocations<TTracked>();
+ track.push_back(time);
+ }
+
+ double meanNoTrack = 0, stddevNoTrack = 0;
+ double meanTrack = 0, stddevTrack = 0;
+ for (size_t i = 0; i < Runs; ++i) {
+ meanNoTrack += noTrack[i];
+ meanTrack += track[i];
+ }
+ meanNoTrack /= Runs;
+ meanTrack /= Runs;
+
+ auto sqr = [](double val) { return val * val; };
+
+ for (size_t i = 0; i < Runs; ++i) {
+ stddevNoTrack += sqr(noTrack[i] - meanNoTrack);
+ stddevTrack += sqr(track[i] - meanTrack);
+ }
+ stddevNoTrack = sqrt(stddevNoTrack / (Runs - 1));
+ stddevTrack = sqrt(stddevTrack / (Runs - 1));
+
+ Cerr << "---- no track - mean: " << meanNoTrack << ", stddev: " << stddevNoTrack << Endl;
+ Cerr << "---- track - mean: " << meanTrack << ", stddev: " << stddevTrack << Endl;
+ Cerr << "---- tracking is slower by " << int((meanTrack / meanNoTrack - 1.0) * 100) << "%" << Endl;
+}
+
+#endif
+
+}
+
+}
+}
diff --git a/library/cpp/actors/core/mon.h b/library/cpp/actors/core/mon.h
new file mode 100644
index 0000000000..c450f2338e
--- /dev/null
+++ b/library/cpp/actors/core/mon.h
@@ -0,0 +1,234 @@
+#pragma once
+
+#include "events.h"
+#include "event_local.h"
+#include <library/cpp/monlib/service/monservice.h>
+#include <library/cpp/monlib/service/pages/mon_page.h>
+
+namespace NActors {
+ namespace NMon {
+ enum {
+ HttpInfo = EventSpaceBegin(NActors::TEvents::ES_MON),
+ HttpInfoRes,
+ RemoteHttpInfo,
+ RemoteHttpInfoRes,
+ RemoteJsonInfoRes,
+ RemoteBinaryInfoRes,
+ End
+ };
+
+ static_assert(End < EventSpaceEnd(NActors::TEvents::ES_MON), "expect End < EventSpaceEnd(NActors::TEvents::ES_MON)");
+
+ // request info from an actor in HTML format
+ struct TEvHttpInfo: public NActors::TEventLocal<TEvHttpInfo, HttpInfo> {
+ TEvHttpInfo(const NMonitoring::IMonHttpRequest& request, int subReqId = 0)
+ : Request(request)
+ , SubRequestId(subReqId)
+ {
+ }
+
+ TEvHttpInfo(const NMonitoring::IMonHttpRequest& request, const TString& userToken)
+ : Request(request)
+ , UserToken(userToken)
+ , SubRequestId(0)
+ {
+ }
+
+ const NMonitoring::IMonHttpRequest& Request;
+ TString UserToken; // built and serialized
+ // SubRequestId != 0 means that we assemble reply from multiple parts and SubRequestId contains this part id
+ int SubRequestId;
+ };
+
+ // base class for HTTP info response
+ struct IEvHttpInfoRes: public NActors::TEventLocal<IEvHttpInfoRes, HttpInfoRes> {
+ enum EContentType {
+ Html,
+ Custom,
+ };
+
+ IEvHttpInfoRes() {
+ }
+
+ virtual ~IEvHttpInfoRes() {
+ }
+
+ virtual void Output(IOutputStream& out) const = 0;
+ virtual EContentType GetContentType() const = 0;
+ };
+
+ // Ready to output HTML in TString
+ struct TEvHttpInfoRes: public IEvHttpInfoRes {
+ TEvHttpInfoRes(const TString& answer, int subReqId = 0, EContentType contentType = Html)
+ : Answer(answer)
+ , SubRequestId(subReqId)
+ , ContentType(contentType)
+ {
+ }
+
+ void Output(IOutputStream& out) const override {
+ out << Answer;
+ }
+
+ EContentType GetContentType() const override {
+ return ContentType;
+ }
+
+ const TString Answer;
+ const int SubRequestId;
+ const EContentType ContentType;
+ };
+
+ struct TEvRemoteHttpInfo: public NActors::TEventBase<TEvRemoteHttpInfo, RemoteHttpInfo> {
+ TEvRemoteHttpInfo() {
+ }
+
+ TEvRemoteHttpInfo(const TString& query)
+ : Query(query)
+ {
+ }
+
+ TEvRemoteHttpInfo(const TString& query, HTTP_METHOD method)
+ : Query(query)
+ , Method(method)
+ {
+ }
+
+ TString Query;
+ HTTP_METHOD Method;
+
+ TString PathInfo() const {
+ const size_t pos = Query.find('?');
+ return (pos == TString::npos) ? TString() : Query.substr(0, pos);
+ }
+
+ TCgiParameters Cgi() const {
+ const size_t pos = Query.find('?');
+ return TCgiParameters((pos == TString::npos) ? TString() : Query.substr(pos + 1));
+ }
+
+ TString ToStringHeader() const override {
+ return "TEvRemoteHttpInfo";
+ }
+
+ bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override {
+ return serializer->WriteString(&Query);
+ }
+
+ ui32 CalculateSerializedSize() const override {
+ return Query.size();
+ }
+
+ bool IsSerializable() const override {
+ return true;
+ }
+
+ static IEventBase* Load(TEventSerializedData* bufs) {
+ return new TEvRemoteHttpInfo(bufs->GetString());
+ }
+
+ HTTP_METHOD GetMethod() const
+ {
+ return Method;
+ }
+ };
+
+ struct TEvRemoteHttpInfoRes: public NActors::TEventBase<TEvRemoteHttpInfoRes, RemoteHttpInfoRes> {
+ TEvRemoteHttpInfoRes() {
+ }
+
+ TEvRemoteHttpInfoRes(const TString& html)
+ : Html(html)
+ {
+ }
+
+ TString Html;
+
+ TString ToStringHeader() const override {
+ return "TEvRemoteHttpInfoRes";
+ }
+
+ bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override {
+ return serializer->WriteString(&Html);
+ }
+
+ ui32 CalculateSerializedSize() const override {
+ return Html.size();
+ }
+
+ bool IsSerializable() const override {
+ return true;
+ }
+
+ static IEventBase* Load(TEventSerializedData* bufs) {
+ return new TEvRemoteHttpInfoRes(bufs->GetString());
+ }
+ };
+
+ struct TEvRemoteJsonInfoRes: public NActors::TEventBase<TEvRemoteJsonInfoRes, RemoteJsonInfoRes> {
+ TEvRemoteJsonInfoRes() {
+ }
+
+ TEvRemoteJsonInfoRes(const TString& json)
+ : Json(json)
+ {
+ }
+
+ TString Json;
+
+ TString ToStringHeader() const override {
+ return "TEvRemoteJsonInfoRes";
+ }
+
+ bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override {
+ return serializer->WriteString(&Json);
+ }
+
+ ui32 CalculateSerializedSize() const override {
+ return Json.size();
+ }
+
+ bool IsSerializable() const override {
+ return true;
+ }
+
+ static IEventBase* Load(TEventSerializedData* bufs) {
+ return new TEvRemoteJsonInfoRes(bufs->GetString());
+ }
+ };
+
+ struct TEvRemoteBinaryInfoRes: public NActors::TEventBase<TEvRemoteBinaryInfoRes, RemoteBinaryInfoRes> {
+ TEvRemoteBinaryInfoRes() {
+ }
+
+ TEvRemoteBinaryInfoRes(const TString& blob)
+ : Blob(blob)
+ {
+ }
+
+ TString Blob;
+
+ TString ToStringHeader() const override {
+ return "TEvRemoteBinaryInfoRes";
+ }
+
+ bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override {
+ return serializer->WriteString(&Blob);
+ }
+
+ ui32 CalculateSerializedSize() const override {
+ return Blob.size();
+ }
+
+ bool IsSerializable() const override {
+ return true;
+ }
+
+ static IEventBase* Load(TEventSerializedData* bufs) {
+ return new TEvRemoteBinaryInfoRes(bufs->GetString());
+ }
+ };
+
+ }
+
+}
diff --git a/library/cpp/actors/core/mon_stats.h b/library/cpp/actors/core/mon_stats.h
new file mode 100644
index 0000000000..d55552af0c
--- /dev/null
+++ b/library/cpp/actors/core/mon_stats.h
@@ -0,0 +1,147 @@
+#pragma once
+
+#include "defs.h"
+#include "actor.h"
+#include <library/cpp/monlib/metrics/histogram_snapshot.h>
+#include <util/system/hp_timer.h>
+
+namespace NActors {
+ struct TLogHistogram : public NMonitoring::IHistogramSnapshot {
+ TLogHistogram() {
+ memset(Buckets, 0, sizeof(Buckets));
+ }
+
+ inline void Add(ui64 val, ui64 inc = 1) {
+ size_t ind = 0;
+#if defined(__clang__) && __clang_major__ == 3 && __clang_minor__ == 7
+ asm volatile("" ::
+ : "memory");
+#endif
+ if (val > 1) {
+ ind = GetValueBitCount(val - 1);
+ }
+#if defined(__clang__) && __clang_major__ == 3 && __clang_minor__ == 7
+ asm volatile("" ::
+ : "memory");
+#endif
+ RelaxedStore(&TotalSamples, RelaxedLoad(&TotalSamples) + inc);
+ RelaxedStore(&Buckets[ind], RelaxedLoad(&Buckets[ind]) + inc);
+ }
+
+ void Aggregate(const TLogHistogram& other) {
+ const ui64 inc = RelaxedLoad(&other.TotalSamples);
+ RelaxedStore(&TotalSamples, RelaxedLoad(&TotalSamples) + inc);
+ for (size_t i = 0; i < Y_ARRAY_SIZE(Buckets); ++i) {
+ Buckets[i] += RelaxedLoad(&other.Buckets[i]);
+ }
+ }
+
+ // IHistogramSnapshot
+ ui32 Count() const override {
+ return Y_ARRAY_SIZE(Buckets);
+ }
+
+ NMonitoring::TBucketBound UpperBound(ui32 index) const override {
+ Y_ASSERT(index < Y_ARRAY_SIZE(Buckets));
+ if (index == 0) {
+ return 1;
+ }
+ return NMonitoring::TBucketBound(1ull << (index - 1)) * 2.0;
+ }
+
+ NMonitoring::TBucketValue Value(ui32 index) const override {
+ Y_ASSERT(index < Y_ARRAY_SIZE(Buckets));
+ return Buckets[index];
+ }
+
+ ui64 TotalSamples = 0;
+ ui64 Buckets[65];
+ };
+
+ struct TExecutorPoolStats {
+ ui64 MaxUtilizationTime = 0;
+ };
+
+ struct TExecutorThreadStats {
+ ui64 SentEvents = 0;
+ ui64 ReceivedEvents = 0;
+ ui64 PreemptedEvents = 0; // Number of events experienced hard preemption
+ ui64 NonDeliveredEvents = 0;
+ ui64 EmptyMailboxActivation = 0;
+ ui64 CpuNs = 0; // nanoseconds thread was executing on CPU (accounts for preemtion)
+ NHPTimer::STime ElapsedTicks = 0;
+ NHPTimer::STime ParkedTicks = 0;
+ NHPTimer::STime BlockedTicks = 0;
+ TLogHistogram ActivationTimeHistogram;
+ TLogHistogram EventDeliveryTimeHistogram;
+ TLogHistogram EventProcessingCountHistogram;
+ TLogHistogram EventProcessingTimeHistogram;
+ TVector<NHPTimer::STime> ElapsedTicksByActivity;
+ TVector<ui64> ReceivedEventsByActivity;
+ TVector<i64> ActorsAliveByActivity; // the sum should be positive, but per-thread might be negative
+ TVector<ui64> ScheduledEventsByActivity;
+ ui64 PoolActorRegistrations = 0;
+ ui64 PoolDestroyedActors = 0;
+ ui64 PoolAllocatedMailboxes = 0;
+ ui64 MailboxPushedOutBySoftPreemption = 0;
+ ui64 MailboxPushedOutByTime = 0;
+ ui64 MailboxPushedOutByEventCount = 0;
+
+ TExecutorThreadStats(size_t activityVecSize = 1) // must be not empty as 0 used as default
+ : ElapsedTicksByActivity(activityVecSize)
+ , ReceivedEventsByActivity(activityVecSize)
+ , ActorsAliveByActivity(activityVecSize)
+ , ScheduledEventsByActivity(activityVecSize)
+ {}
+
+ template <typename T>
+ static void AggregateOne(TVector<T>& self, const TVector<T>& other) {
+ const size_t selfSize = self.size();
+ const size_t otherSize = other.size();
+ if (selfSize < otherSize)
+ self.resize(otherSize);
+ for (size_t at = 0; at < otherSize; ++at)
+ self[at] += RelaxedLoad(&other[at]);
+ }
+
+ void Aggregate(const TExecutorThreadStats& other) {
+ SentEvents += RelaxedLoad(&other.SentEvents);
+ ReceivedEvents += RelaxedLoad(&other.ReceivedEvents);
+ PreemptedEvents += RelaxedLoad(&other.PreemptedEvents);
+ NonDeliveredEvents += RelaxedLoad(&other.NonDeliveredEvents);
+ EmptyMailboxActivation += RelaxedLoad(&other.EmptyMailboxActivation);
+ CpuNs += RelaxedLoad(&other.CpuNs);
+ ElapsedTicks += RelaxedLoad(&other.ElapsedTicks);
+ ParkedTicks += RelaxedLoad(&other.ParkedTicks);
+ BlockedTicks += RelaxedLoad(&other.BlockedTicks);
+ MailboxPushedOutBySoftPreemption += RelaxedLoad(&other.MailboxPushedOutBySoftPreemption);
+ MailboxPushedOutByTime += RelaxedLoad(&other.MailboxPushedOutByTime);
+ MailboxPushedOutByEventCount += RelaxedLoad(&other.MailboxPushedOutByEventCount);
+
+ ActivationTimeHistogram.Aggregate(other.ActivationTimeHistogram);
+ EventDeliveryTimeHistogram.Aggregate(other.EventDeliveryTimeHistogram);
+ EventProcessingCountHistogram.Aggregate(other.EventProcessingCountHistogram);
+ EventProcessingTimeHistogram.Aggregate(other.EventProcessingTimeHistogram);
+
+ AggregateOne(ElapsedTicksByActivity, other.ElapsedTicksByActivity);
+ AggregateOne(ReceivedEventsByActivity, other.ReceivedEventsByActivity);
+ AggregateOne(ActorsAliveByActivity, other.ActorsAliveByActivity);
+ AggregateOne(ScheduledEventsByActivity, other.ScheduledEventsByActivity);
+
+ RelaxedStore(
+ &PoolActorRegistrations,
+ std::max(RelaxedLoad(&PoolActorRegistrations), RelaxedLoad(&other.PoolActorRegistrations)));
+ RelaxedStore(
+ &PoolDestroyedActors,
+ std::max(RelaxedLoad(&PoolDestroyedActors), RelaxedLoad(&other.PoolDestroyedActors)));
+ RelaxedStore(
+ &PoolAllocatedMailboxes,
+ std::max(RelaxedLoad(&PoolAllocatedMailboxes), RelaxedLoad(&other.PoolAllocatedMailboxes)));
+ }
+
+ size_t MaxActivityType() const {
+ return ActorsAliveByActivity.size();
+ }
+ };
+
+}
diff --git a/library/cpp/actors/core/monotonic.cpp b/library/cpp/actors/core/monotonic.cpp
new file mode 100644
index 0000000000..3465149dbe
--- /dev/null
+++ b/library/cpp/actors/core/monotonic.cpp
@@ -0,0 +1,23 @@
+#include "monotonic.h"
+
+#include <chrono>
+
+namespace NActors {
+
+ namespace {
+ // Unfortunately time_since_epoch() is sometimes negative on wine
+ // Remember initial time point at program start and use offsets from that
+ std::chrono::steady_clock::time_point MonotonicOffset = std::chrono::steady_clock::now();
+ }
+
+ ui64 GetMonotonicMicroSeconds() {
+ auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - MonotonicOffset).count();
+ // Steady clock is supposed to never jump backwards, but it's better to be safe in case of buggy implementations
+ if (Y_UNLIKELY(microseconds < 0)) {
+ microseconds = 0;
+ }
+ // Add one so we never return zero
+ return microseconds + 1;
+ }
+
+} // namespace NActors
diff --git a/library/cpp/actors/core/monotonic.h b/library/cpp/actors/core/monotonic.h
new file mode 100644
index 0000000000..6fceb91dbe
--- /dev/null
+++ b/library/cpp/actors/core/monotonic.h
@@ -0,0 +1,111 @@
+#pragma once
+
+#include <util/datetime/base.h>
+
+namespace NActors {
+
+ /**
+ * Returns current monotonic time in microseconds
+ */
+ ui64 GetMonotonicMicroSeconds();
+
+ /**
+ * Similar to TInstant, but measuring monotonic time
+ */
+ class TMonotonic : public TTimeBase<TMonotonic> {
+ using TBase = TTimeBase<TMonotonic>;
+
+ private:
+ constexpr explicit TMonotonic(TValue value) noexcept
+ : TBase(value)
+ { }
+
+ public:
+ constexpr TMonotonic() noexcept {
+ }
+
+ static constexpr TMonotonic FromValue(TValue value) noexcept {
+ return TMonotonic(value);
+ }
+
+ static inline TMonotonic Now() {
+ return TMonotonic::MicroSeconds(GetMonotonicMicroSeconds());
+ }
+
+ using TBase::Days;
+ using TBase::Hours;
+ using TBase::MicroSeconds;
+ using TBase::MilliSeconds;
+ using TBase::Minutes;
+ using TBase::Seconds;
+
+ static constexpr TMonotonic Max() noexcept {
+ return TMonotonic(::Max<ui64>());
+ }
+
+ static constexpr TMonotonic Zero() noexcept {
+ return TMonotonic();
+ }
+
+ static constexpr TMonotonic MicroSeconds(ui64 us) noexcept {
+ return TMonotonic(TInstant::MicroSeconds(us).GetValue());
+ }
+
+ static constexpr TMonotonic MilliSeconds(ui64 ms) noexcept {
+ return TMonotonic(TInstant::MilliSeconds(ms).GetValue());
+ }
+
+ static constexpr TMonotonic Seconds(ui64 s) noexcept {
+ return TMonotonic(TInstant::Seconds(s).GetValue());
+ }
+
+ static constexpr TMonotonic Minutes(ui64 m) noexcept {
+ return TMonotonic(TInstant::Minutes(m).GetValue());
+ }
+
+ static constexpr TMonotonic Hours(ui64 h) noexcept {
+ return TMonotonic(TInstant::Hours(h).GetValue());
+ }
+
+ static constexpr TMonotonic Days(ui64 d) noexcept {
+ return TMonotonic(TInstant::Days(d).GetValue());
+ }
+
+ template<class T>
+ inline TMonotonic& operator+=(const T& t) noexcept {
+ return (*this = (*this + t));
+ }
+
+ template<class T>
+ inline TMonotonic& operator-=(const T& t) noexcept {
+ return (*this = (*this - t));
+ }
+ };
+} // namespace NActors
+
+Y_DECLARE_PODTYPE(NActors::TMonotonic);
+
+template<>
+struct THash<NActors::TMonotonic> {
+ size_t operator()(const NActors::TMonotonic& key) const {
+ return THash<NActors::TMonotonic::TValue>()(key.GetValue());
+ }
+};
+
+namespace NActors {
+
+ constexpr TDuration operator-(const TMonotonic& l, const TMonotonic& r) {
+ return TInstant::FromValue(l.GetValue()) - TInstant::FromValue(r.GetValue());
+ }
+
+ constexpr TMonotonic operator+(const TMonotonic& l, const TDuration& r) {
+ TInstant result = TInstant::FromValue(l.GetValue()) + r;
+ return TMonotonic::FromValue(result.GetValue());
+ }
+
+ constexpr TMonotonic operator-(const TMonotonic& l, const TDuration& r) {
+ TInstant result = TInstant::FromValue(l.GetValue()) - r;
+ return TMonotonic::FromValue(result.GetValue());
+ }
+
+} // namespace NActors
diff --git a/library/cpp/actors/core/probes.cpp b/library/cpp/actors/core/probes.cpp
new file mode 100644
index 0000000000..7ace83e102
--- /dev/null
+++ b/library/cpp/actors/core/probes.cpp
@@ -0,0 +1,28 @@
+#include "probes.h"
+
+#include "actorsystem.h"
+
+#include <util/string/builder.h>
+
+LWTRACE_DEFINE_PROVIDER(ACTORLIB_PROVIDER);
+
+namespace NActors {
+ TVector<NLWTrace::TDashboard> LWTraceDashboards(TActorSystemSetup* setup) {
+ TVector<NLWTrace::TDashboard> result;
+
+ NLWTrace::TDashboard slowDash;
+ ui32 pools = setup->GetExecutorsCount();
+ size_t top = 30;
+ slowDash.SetName("ActorSystem slow events");
+ slowDash.SetDescription(TStringBuilder() << "TOP" << top << " slow event executions >1M cycles for every pool (refresh page to update)");
+ for (ui32 pool = 0; pool < pools; pool++) {
+ auto* row = slowDash.AddRows();
+ auto* cell = row->AddCells();
+ cell->SetTitle(TStringBuilder() << pool << ":" << setup->GetPoolName(pool));
+ cell->SetUrl(TStringBuilder() << "?mode=log&id=.ACTORLIB_PROVIDER.SlowEvent.ppoolId=" << pool << "&s=eventMs&reverse=y&head=30");
+ }
+ result.push_back(slowDash);
+
+ return result;
+ }
+}
diff --git a/library/cpp/actors/core/probes.h b/library/cpp/actors/core/probes.h
new file mode 100644
index 0000000000..4912d6dd26
--- /dev/null
+++ b/library/cpp/actors/core/probes.h
@@ -0,0 +1,176 @@
+#pragma once
+
+#include <library/cpp/lwtrace/all.h>
+#include <util/generic/vector.h>
+
+#define LWACTORID(x) (x).RawX1(), (x).RawX2(), (x).NodeId(), (x).PoolID()
+#define LWTYPE_ACTORID ui64, ui64, ui32, ui32
+#define LWNAME_ACTORID(n) n "Raw1", n "Raw2", n "NodeId", n "PoolId"
+
+#define ACTORLIB_PROVIDER(PROBE, EVENT, GROUPS, TYPES, NAMES) \
+ PROBE(SlowEvent, GROUPS("ActorLibSlow"), \
+ TYPES(ui32, double, TString, TString, TString), \
+ NAMES("poolId", "eventMs", "eventType", "actorId", "actorType")) \
+ PROBE(EventSlowDelivery, GROUPS("ActorLibSlow"), \
+ TYPES(ui32, double, double, ui64, TString, TString, TString), \
+ NAMES("poolId", "deliveryMs", "sinceActivationMs", "eventProcessedBefore", "eventType", "actorId", "actorType")) \
+ PROBE(SlowActivation, GROUPS("ActorLibSlow"), \
+ TYPES(ui32, double), \
+ NAMES("poolId", "activationMs")) \
+ PROBE(SlowRegisterNew, GROUPS("ActorLibSlow"), \
+ TYPES(ui32, double), \
+ NAMES("poolId", "registerNewMs")) \
+ PROBE(SlowRegisterAdd, GROUPS("ActorLibSlow"), \
+ TYPES(ui32, double), \
+ NAMES("poolId", "registerAddMs")) \
+ PROBE(MailboxPushedOutBySoftPreemption, GROUPS("ActorLibMailbox", "ActorLibMailboxPushedOut"), \
+ TYPES(ui32, TString, ui32, TDuration, ui64, TString, TString), \
+ NAMES("poolId", "pool", "eventsProcessed", "procTimeMs", "workerId", "actorId", "actorType")) \
+ PROBE(MailboxPushedOutByTime, GROUPS("ActorLibMailbox", "ActorLibMailboxPushedOut"), \
+ TYPES(ui32, TString, ui32, TDuration, ui64, TString, TString), \
+ NAMES("poolId", "pool", "eventsProcessed", "procTimeMs", "workerId", "actorId", "actorType")) \
+ PROBE(MailboxPushedOutByEventCount, GROUPS("ActorLibMailbox", "ActorLibMailboxPushedOut"), \
+ TYPES(ui32, TString, ui32, TDuration, ui64, TString, TString), \
+ NAMES("poolId", "pool", "eventsProcessed", "procTimeMs", "workerId", "actorId", "actorType")) \
+ PROBE(MailboxEmpty, GROUPS("ActorLibMailbox"), \
+ TYPES(ui32, TString, ui32, TDuration, ui64, TString, TString), \
+ NAMES("poolId", "pool", "eventsProcessed", "procTimeMs", "workerId", "actorId", "actorType")) \
+ PROBE(ActivationBegin, GROUPS(), \
+ TYPES(ui32, ui32, ui32, double), \
+ NAMES("cpu", "poolId", "workerId", "expireMs")) \
+ PROBE(ActivationEnd, GROUPS(), \
+ TYPES(ui32, ui32, ui32), \
+ NAMES("cpu", "poolId", "workerId")) \
+ PROBE(ExecutorThreadStats, GROUPS("ActorLibStats"), \
+ TYPES(ui32, TString, ui64, ui64, ui64, double, double), \
+ NAMES("poolId", "pool", "workerId", "execCount", "readyActivationCount", "execMs", "nonExecMs")) \
+ PROBE(SlowICReadLoopAdjustSize, GROUPS("ActorLibSlowIC"), \
+ TYPES(double), \
+ NAMES("icReadLoopAdjustSizeMs")) \
+ PROBE(SlowICReadFromSocket, GROUPS("ActorLibSlowIC"), \
+ TYPES(double), \
+ NAMES("icReadFromSocketMs")) \
+ PROBE(SlowICReadLoopSend, GROUPS("ActorLibSlowIC"), \
+ TYPES(double), \
+ NAMES("icReadLoopSendMs")) \
+ PROBE(SlowICAllocPacketBuffer, GROUPS("ActorLibSlowIC"), \
+ TYPES(ui32, double), \
+ NAMES("peerId", "icAllocPacketBufferMs")) \
+ PROBE(SlowICFillSendingBuffer, GROUPS("ActorLibSlowIC"), \
+ TYPES(ui32, double), \
+ NAMES("peerId", "icFillSendingBufferMs")) \
+ PROBE(SlowICPushSentPackets, GROUPS("ActorLibSlowIC"), \
+ TYPES(ui32, double), \
+ NAMES("peerId", "icPushSentPacketsMs")) \
+ PROBE(SlowICPushSendQueue, GROUPS("ActorLibSlowIC"), \
+ TYPES(ui32, double), \
+ NAMES("peerId", "icPushSendQueueMs")) \
+ PROBE(SlowICWriteData, GROUPS("ActorLibSlowIC"), \
+ TYPES(ui32, double), \
+ NAMES("peerId", "icWriteDataMs")) \
+ PROBE(SlowICDropConfirmed, GROUPS("ActorLibSlowIC"), \
+ TYPES(ui32, double), \
+ NAMES("peerId", "icDropConfirmedMs")) \
+ PROBE(ActorsystemScheduler, GROUPS("Durations"), \
+ TYPES(ui64, ui64, ui32, ui32, ui64, ui64), \
+ NAMES("timeUs", "timerfd_expirations", "eventsGottenFromQueues", "eventsSent", \
+ "eventsInSendQueue", "eventSchedulingErrorUs")) \
+ PROBE(ForwardEvent, GROUPS("Orbit", "InterconnectSessionTCP"), \
+ TYPES(ui32, ui32, ui32, LWTYPE_ACTORID, LWTYPE_ACTORID, ui64, ui32), \
+ NAMES("peerId", "type", "flags", LWNAME_ACTORID("r"), LWNAME_ACTORID("s"), \
+ "cookie", "eventSerializedSize")) \
+ PROBE(EnqueueEvent, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, ui64, TDuration, ui16, ui64, ui64), \
+ NAMES("peerId", "numEventsInReadyChannels", "enqueueBlockedTotalMs", "channelId", "queueSizeInEvents", "queueSizeInBytes")) \
+ PROBE(SerializeToPacketBegin, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, ui16, ui64), \
+ NAMES("peerId", "channelId", "outputQueueSize")) \
+ PROBE(SerializeToPacketEnd, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, ui16, ui64, ui64), \
+ NAMES("peerId", "channelId", "outputQueueSize", "offsetInPacket")) \
+ PROBE(FillSendingBuffer, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, ui32, ui64, TDuration), \
+ NAMES("peerId", "taskBytesGenerated", "numEventsInReadyChannelsBehind", "fillBlockedTotalMs")) \
+ PROBE(PacketGenerated, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, ui64, ui64, ui64, ui64), \
+ NAMES("peerId", "bytesUnwritten", "inflightBytes", "packetsGenerated", "packetSize")) \
+ PROBE(PacketWrittenToSocket, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, ui64, bool, ui64, ui64, TDuration, int), \
+ NAMES("peerId", "packetsWrittenToSocket", "triedWriting", "packetDataSize", "bytesUnwritten", "writeBlockedTotalMs", "fd")) \
+ PROBE(GenerateTraffic, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, double, ui64, ui32, ui64), \
+ NAMES("peerId", "generateTrafficMs", "dataBytesSent", "generatedPackets", "generatedBytes")) \
+ PROBE(WriteToSocket, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, ui64, ui64, ui64, ui64, TDuration, int), \
+ NAMES("peerId", "bytesWritten", "packetsWritten", "packetsWrittenToSocket", "bytesUnwritten", "writeBlockedTotalMs", "fd")) \
+ PROBE(UpdateFromInputSession, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, double), \
+ NAMES("peerId", "pingMs")) \
+ PROBE(UnblockByDropConfirmed, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, double), \
+ NAMES("peerId", "updateDeliveryMs")) \
+ PROBE(DropConfirmed, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, ui64, ui64), \
+ NAMES("peerId", "droppedBytes", "inflightBytes")) \
+ PROBE(StartRam, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32), \
+ NAMES("peerId")) \
+ PROBE(FinishRam, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, double), \
+ NAMES("peerId", "ramMs")) \
+ PROBE(SkipGenerateTraffic, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, double), \
+ NAMES("peerId", "elapsedSinceRamMs")) \
+ PROBE(StartBatching, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, double), \
+ NAMES("peerId", "batchPeriodMs")) \
+ PROBE(FinishBatching, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, double), \
+ NAMES("peerId", "finishBatchDeliveryMs")) \
+ PROBE(BlockedWrite, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, double, ui64), \
+ NAMES("peerId", "sendQueueSize", "writtenBytes")) \
+ PROBE(ReadyWrite, GROUPS("InterconnectSessionTCP"), \
+ TYPES(ui32, double, double), \
+ NAMES("peerId", "readyWriteDeliveryMs", "blockMs")) \
+ PROBE(EpollStartWaitIn, GROUPS("EpollThread"), \
+ TYPES(), \
+ NAMES()) \
+ PROBE(EpollFinishWaitIn, GROUPS("EpollThread"), \
+ TYPES(i32), \
+ NAMES("eventsCount")) \
+ PROBE(EpollWaitOut, GROUPS("EpollThread"), \
+ TYPES(i32), \
+ NAMES("eventsCount")) \
+ PROBE(EpollSendReadyRead, GROUPS("EpollThread"), \
+ TYPES(bool, bool, int), \
+ NAMES("hangup", "event", "fd")) \
+ PROBE(EpollSendReadyWrite, GROUPS("EpollThread"), \
+ TYPES(bool, bool, int), \
+ NAMES("hangup", "event", "fd")) \
+ PROBE(HardPreemption, GROUPS("UnitedWorker"), \
+ TYPES(ui32, ui32, ui32, ui32), \
+ NAMES("cpu", "prevPoolId", "prevWorkerId", "nextWorkerId")) \
+ PROBE(SetPreemptionTimer, GROUPS("UnitedWorker", "PreemptionTimer"), \
+ TYPES(ui32, ui32, int, double, double), \
+ NAMES("cpu", "workerId", "fd", "nowMs", "preemptMs")) \
+ PROBE(ResetPreemptionTimer, GROUPS("UnitedWorker", "PreemptionTimer"), \
+ TYPES(ui32, ui32, int, double, double), \
+ NAMES("cpu", "workerId", "fd", "nowMs", "preemptMs")) \
+ PROBE(SlowWorkerActionRace, GROUPS("UnitedWorker"), \
+ TYPES(ui32, ui32, ui64), \
+ NAMES("cpu", "poolId", "slowPoolsMask")) \
+ PROBE(PoolStats, GROUPS("PoolCpuBalancer"), \
+ TYPES(ui32, TString, ui64, ui8, ui8, double, double, double, ui64, ui64, ui64), \
+ NAMES("poolId", "pool", "currentCpus", "loadClass", "priority", "scaleFactor", "cpuIdle", "cpuLoad", "importance", "addImportance", "subImportance")) \
+ PROBE(MoveCpu, GROUPS("PoolCpuBalancer"), \
+ TYPES(ui32, ui64, TString, TString, ui32), \
+ NAMES("fromPoolId", "toPoolId", "fromPool", "toPool", "cpu")) \
+ /**/
+
+LWTRACE_DECLARE_PROVIDER(ACTORLIB_PROVIDER)
+
+namespace NActors {
+ struct TActorSystemSetup;
+ TVector<NLWTrace::TDashboard> LWTraceDashboards(TActorSystemSetup* setup);
+}
diff --git a/library/cpp/actors/core/process_stats.cpp b/library/cpp/actors/core/process_stats.cpp
new file mode 100644
index 0000000000..0e1dbd0031
--- /dev/null
+++ b/library/cpp/actors/core/process_stats.cpp
@@ -0,0 +1,303 @@
+#include "actorsystem.h"
+#include "actor_bootstrapped.h"
+#include "hfunc.h"
+#include "process_stats.h"
+
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+#include <library/cpp/monlib/metrics/metric_registry.h>
+
+#include <util/datetime/uptime.h>
+#include <util/system/defaults.h>
+#include <util/stream/file.h>
+#include <util/string/vector.h>
+#include <util/string/split.h>
+
+#ifndef _win_
+#include <sys/user.h>
+#include <sys/sysctl.h>
+#endif
+
+namespace NActors {
+#ifdef _linux_
+
+ namespace {
+ template <typename TVal>
+ static bool ExtractVal(const TString& str, const TString& name, TVal& res) {
+ if (!str.StartsWith(name))
+ return false;
+ size_t pos = name.size();
+ while (pos < str.size() && (str[pos] == ' ' || str[pos] == '\t')) {
+ pos++;
+ }
+ res = atol(str.data() + pos);
+ return true;
+ }
+
+ float TicksPerMillisec() {
+#ifdef _SC_CLK_TCK
+ return sysconf(_SC_CLK_TCK) / 1000.0;
+#else
+ return 1.f;
+#endif
+ }
+ }
+
+ bool TProcStat::Fill(pid_t pid) {
+ try {
+ TString strPid(ToString(pid));
+ TFileInput proc("/proc/" + strPid + "/status");
+ TString str;
+ while (proc.ReadLine(str)) {
+ if (ExtractVal(str, "VmRSS:", Rss))
+ continue;
+ if (ExtractVal(str, "voluntary_ctxt_switches:", VolCtxSwtch))
+ continue;
+ if (ExtractVal(str, "nonvoluntary_ctxt_switches:", NonvolCtxSwtch))
+ continue;
+ }
+ // Convert from kB to bytes
+ Rss *= 1024;
+
+ float tickPerMillisec = TicksPerMillisec();
+
+ TFileInput procStat("/proc/" + strPid + "/stat");
+ procStat.ReadLine(str);
+ if (!str.empty()) {
+ sscanf(str.data(),
+ "%d %*s %c %d %d %d %d %d %u %lu %lu "
+ "%lu %lu %lu %lu %ld %ld %ld %ld %ld "
+ "%ld %llu %lu %ld %lu",
+ &Pid, &State, &Ppid, &Pgrp, &Session, &TtyNr, &TPgid, &Flags, &MinFlt, &CMinFlt,
+ &MajFlt, &CMajFlt, &Utime, &Stime, &CUtime, &CStime, &Priority, &Nice, &NumThreads,
+ &ItRealValue, &StartTime, &Vsize, &RssPages, &RssLim);
+ Utime /= tickPerMillisec;
+ Stime /= tickPerMillisec;
+ CUtime /= tickPerMillisec;
+ CStime /= tickPerMillisec;
+ SystemUptime = ::Uptime();
+ Uptime = SystemUptime - TDuration::MilliSeconds(StartTime / TicksPerMillisec());
+ }
+
+ TFileInput statm("/proc/" + strPid + "/statm");
+ statm.ReadLine(str);
+ TVector<TString> fields;
+ StringSplitter(str).Split(' ').SkipEmpty().Collect(&fields);
+ if (fields.size() >= 7) {
+ ui64 resident = FromString<ui64>(fields[1]);
+ ui64 shared = FromString<ui64>(fields[2]);
+ if (PageSize == 0) {
+ PageSize = ObtainPageSize();
+ }
+ FileRss = shared * PageSize;
+ AnonRss = (resident - shared) * PageSize;
+ }
+
+ TFileInput cgroup("/proc/" + strPid + "/cgroup");
+ TString line;
+ TString memoryCGroup;
+ while (cgroup.ReadLine(line) > 0) {
+ StringSplitter(line).Split(':').Collect(&fields);
+ if (fields.size() > 2 && fields[1] == "memory") {
+ memoryCGroup = fields[2];
+ break;
+ }
+ }
+ if (!memoryCGroup.empty()) {
+ TFileInput limit("/sys/fs/cgroup/memory" + memoryCGroup + "/memory.limit_in_bytes");
+ if (limit.ReadLine(line) > 0) {
+ CGroupMemLim = FromString<ui64>(line);
+ if (CGroupMemLim > (1ULL << 40)) {
+ CGroupMemLim = 0;
+ }
+ }
+ }
+
+ } catch (...) {
+ return false;
+ }
+ return true;
+ }
+
+ long TProcStat::ObtainPageSize() {
+ long sz = sysconf(_SC_PAGESIZE);
+ return sz;
+ }
+
+#else
+
+ bool TProcStat::Fill(pid_t pid) {
+ Y_UNUSED(pid);
+ return false;
+ }
+
+ long TProcStat::ObtainPageSize() {
+ return 0;
+ }
+
+#endif
+
+namespace {
+ // Periodically collects process stats and exposes them as mon counters
+ template <typename TDerived>
+ class TProcStatCollectingActor: public TActorBootstrapped<TProcStatCollectingActor<TDerived>> {
+ public:
+ static constexpr IActor::EActivityType ActorActivityType() {
+ return IActor::ACTORLIB_STATS;
+ }
+
+ TProcStatCollectingActor(TDuration interval)
+ : Interval(interval)
+ {
+ }
+
+ void Bootstrap(const TActorContext& ctx) {
+ ctx.Schedule(Interval, new TEvents::TEvWakeup());
+ Self()->Become(&TDerived::StateWork);
+ }
+
+ STFUNC(StateWork) {
+ switch (ev->GetTypeRewrite()) {
+ CFunc(TEvents::TSystem::Wakeup, Wakeup);
+ }
+ }
+
+ private:
+ void Wakeup(const TActorContext& ctx) {
+ Self()->UpdateCounters(ProcStat);
+ ctx.Schedule(Interval, new TEvents::TEvWakeup());
+ }
+
+ TDerived* Self() {
+ ProcStat.Fill(getpid());
+ return static_cast<TDerived*>(this);
+ }
+
+ private:
+ const TDuration Interval;
+ TProcStat ProcStat;
+ };
+
+ // Periodically collects process stats and exposes them as mon counters
+ class TDynamicCounterCollector: public TProcStatCollectingActor<TDynamicCounterCollector> {
+ using TBase = TProcStatCollectingActor<TDynamicCounterCollector>;
+ public:
+ TDynamicCounterCollector(
+ ui32 intervalSeconds,
+ NMonitoring::TDynamicCounterPtr counters)
+ : TBase{TDuration::Seconds(intervalSeconds)}
+ {
+ ProcStatGroup = counters->GetSubgroup("counters", "utils");
+
+ VmSize = ProcStatGroup->GetCounter("Process/VmSize", false);
+ AnonRssSize = ProcStatGroup->GetCounter("Process/AnonRssSize", false);
+ FileRssSize = ProcStatGroup->GetCounter("Process/FileRssSize", false);
+ CGroupMemLimit = ProcStatGroup->GetCounter("Process/CGroupMemLimit", false);
+ UserTime = ProcStatGroup->GetCounter("Process/UserTime", true);
+ SysTime = ProcStatGroup->GetCounter("Process/SystemTime", true);
+ MinorPageFaults = ProcStatGroup->GetCounter("Process/MinorPageFaults", true);
+ MajorPageFaults = ProcStatGroup->GetCounter("Process/MajorPageFaults", true);
+ UptimeSeconds = ProcStatGroup->GetCounter("Process/UptimeSeconds", false);
+ NumThreads = ProcStatGroup->GetCounter("Process/NumThreads", false);
+ SystemUptimeSeconds = ProcStatGroup->GetCounter("System/UptimeSeconds", false);
+ }
+
+ void UpdateCounters(const TProcStat& procStat) {
+ *VmSize = procStat.Vsize;
+ *AnonRssSize = procStat.AnonRss;
+ *FileRssSize = procStat.FileRss;
+ if (procStat.CGroupMemLim) {
+ *CGroupMemLimit = procStat.CGroupMemLim;
+ }
+ *UserTime = procStat.Utime;
+ *SysTime = procStat.Stime;
+ *MinorPageFaults = procStat.MinFlt;
+ *MajorPageFaults = procStat.MajFlt;
+ *UptimeSeconds = procStat.Uptime.Seconds();
+ *NumThreads = procStat.NumThreads;
+ *SystemUptimeSeconds = procStat.Uptime.Seconds();
+ }
+
+ private:
+ NMonitoring::TDynamicCounterPtr ProcStatGroup;
+ NMonitoring::TDynamicCounters::TCounterPtr VmSize;
+ NMonitoring::TDynamicCounters::TCounterPtr AnonRssSize;
+ NMonitoring::TDynamicCounters::TCounterPtr FileRssSize;
+ NMonitoring::TDynamicCounters::TCounterPtr CGroupMemLimit;
+ NMonitoring::TDynamicCounters::TCounterPtr UserTime;
+ NMonitoring::TDynamicCounters::TCounterPtr SysTime;
+ NMonitoring::TDynamicCounters::TCounterPtr MinorPageFaults;
+ NMonitoring::TDynamicCounters::TCounterPtr MajorPageFaults;
+ NMonitoring::TDynamicCounters::TCounterPtr UptimeSeconds;
+ NMonitoring::TDynamicCounters::TCounterPtr NumThreads;
+ NMonitoring::TDynamicCounters::TCounterPtr SystemUptimeSeconds;
+ };
+
+
+ class TRegistryCollector: public TProcStatCollectingActor<TRegistryCollector> {
+ using TBase = TProcStatCollectingActor<TRegistryCollector>;
+ public:
+ TRegistryCollector(TDuration interval, NMonitoring::TMetricRegistry& registry)
+ : TBase{interval}
+ {
+ VmSize = registry.IntGauge({{"sensor", "process.VmSize"}});
+ AnonRssSize = registry.IntGauge({{"sensor", "process.AnonRssSize"}});
+ FileRssSize = registry.IntGauge({{"sensor", "process.FileRssSize"}});
+ CGroupMemLimit = registry.IntGauge({{"sensor", "process.CGroupMemLimit"}});
+ UptimeSeconds = registry.IntGauge({{"sensor", "process.UptimeSeconds"}});
+ NumThreads = registry.IntGauge({{"sensor", "process.NumThreads"}});
+ SystemUptimeSeconds = registry.IntGauge({{"sensor", "system.UptimeSeconds"}});
+
+ UserTime = registry.Rate({{"sensor", "process.UserTime"}});
+ SysTime = registry.Rate({{"sensor", "process.SystemTime"}});
+ MinorPageFaults = registry.Rate({{"sensor", "process.MinorPageFaults"}});
+ MajorPageFaults = registry.Rate({{"sensor", "process.MajorPageFaults"}});
+ }
+
+ void UpdateCounters(const TProcStat& procStat) {
+ VmSize->Set(procStat.Vsize);
+ AnonRssSize->Set(procStat.AnonRss);
+ FileRssSize->Set(procStat.FileRss);
+ CGroupMemLimit->Set(procStat.CGroupMemLim);
+ UptimeSeconds->Set(procStat.Uptime.Seconds());
+ NumThreads->Set(procStat.NumThreads);
+ SystemUptimeSeconds->Set(procStat.SystemUptime.Seconds());
+
+ // it is ok here to reset and add metric value, because mutation
+ // is performed in siglethreaded context
+
+ UserTime->Reset();
+ UserTime->Add(procStat.Utime);
+
+ SysTime->Reset();
+ SysTime->Add(procStat.Stime);
+
+ MinorPageFaults->Reset();
+ MinorPageFaults->Add(procStat.MinFlt);
+
+ MajorPageFaults->Reset();
+ MajorPageFaults->Add(procStat.MajFlt);
+ }
+
+ private:
+ NMonitoring::TIntGauge* VmSize;
+ NMonitoring::TIntGauge* AnonRssSize;
+ NMonitoring::TIntGauge* FileRssSize;
+ NMonitoring::TIntGauge* CGroupMemLimit;
+ NMonitoring::TRate* UserTime;
+ NMonitoring::TRate* SysTime;
+ NMonitoring::TRate* MinorPageFaults;
+ NMonitoring::TRate* MajorPageFaults;
+ NMonitoring::TIntGauge* UptimeSeconds;
+ NMonitoring::TIntGauge* NumThreads;
+ NMonitoring::TIntGauge* SystemUptimeSeconds;
+ };
+} // namespace
+
+ IActor* CreateProcStatCollector(ui32 intervalSec, NMonitoring::TDynamicCounterPtr counters) {
+ return new TDynamicCounterCollector(intervalSec, counters);
+ }
+
+ IActor* CreateProcStatCollector(TDuration interval, NMonitoring::TMetricRegistry& registry) {
+ return new TRegistryCollector(interval, registry);
+ }
+}
diff --git a/library/cpp/actors/core/process_stats.h b/library/cpp/actors/core/process_stats.h
new file mode 100644
index 0000000000..66346d0b5a
--- /dev/null
+++ b/library/cpp/actors/core/process_stats.h
@@ -0,0 +1,66 @@
+#pragma once
+
+#include "defs.h"
+#include "actor.h"
+
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+
+namespace NMonitoring {
+ class TMetricRegistry;
+}
+
+namespace NActors {
+ struct TProcStat {
+ ui64 Rss;
+ ui64 VolCtxSwtch;
+ ui64 NonvolCtxSwtch;
+
+ int Pid;
+ char State;
+ int Ppid;
+ int Pgrp;
+ int Session;
+ int TtyNr;
+ int TPgid;
+ unsigned Flags;
+ unsigned long MinFlt;
+ unsigned long CMinFlt;
+ unsigned long MajFlt;
+ unsigned long CMajFlt;
+ unsigned long Utime;
+ unsigned long Stime;
+ long CUtime;
+ long CStime;
+ long Priority;
+ long Nice;
+ long NumThreads;
+ long ItRealValue;
+ // StartTime is measured from system boot
+ unsigned long long StartTime;
+ unsigned long Vsize;
+ long RssPages;
+ unsigned long RssLim;
+ ui64 FileRss;
+ ui64 AnonRss;
+ ui64 CGroupMemLim = 0;
+
+ TDuration Uptime;
+ TDuration SystemUptime;
+ // ...
+
+ TProcStat() {
+ Zero(*this);
+ Y_UNUSED(PageSize);
+ }
+
+ bool Fill(pid_t pid);
+
+ private:
+ long PageSize = 0;
+
+ long ObtainPageSize();
+ };
+
+ IActor* CreateProcStatCollector(ui32 intervalSec, NMonitoring::TDynamicCounterPtr counters);
+ IActor* CreateProcStatCollector(TDuration interval, NMonitoring::TMetricRegistry& registry);
+}
diff --git a/library/cpp/actors/core/scheduler_actor.cpp b/library/cpp/actors/core/scheduler_actor.cpp
new file mode 100644
index 0000000000..febc5e40dd
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_actor.cpp
@@ -0,0 +1,279 @@
+#include "actor_bootstrapped.h"
+#include "hfunc.h"
+#include "probes.h"
+#include "scheduler_actor.h"
+#include "scheduler_queue.h"
+
+#include <library/cpp/actors/interconnect/poller_actor.h>
+#include <util/system/hp_timer.h>
+
+#ifdef __linux__
+#include <sys/timerfd.h>
+#include <errno.h>
+
+LWTRACE_USING(ACTORLIB_PROVIDER);
+
+namespace NActors {
+ class TTimerDescriptor: public TSharedDescriptor {
+ const int Descriptor;
+
+ public:
+ TTimerDescriptor()
+ : Descriptor(timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK))
+ {
+ Y_VERIFY(Descriptor != -1, "timerfd_create() failed with %s", strerror(errno));
+ }
+
+ ~TTimerDescriptor() override {
+ close(Descriptor);
+ }
+
+ int GetDescriptor() override {
+ return Descriptor;
+ }
+ };
+
+ class TSchedulerActor: public TActor<TSchedulerActor> {
+ const TSchedulerConfig Cfg;
+ TIntrusivePtr<TSharedDescriptor> TimerDescriptor;
+
+ TVector<NSchedulerQueue::TReader*> Readers;
+
+ TActorId PollerActor;
+ TPollerToken::TPtr PollerToken;
+
+ ui64 RealTime;
+ ui64 MonotonicTime;
+
+ ui64 ActiveTick;
+ typedef TMap<ui64, TAutoPtr<NSchedulerQueue::TQueueType>> TMomentMap; // intrasecond queues
+ typedef THashMap<ui64, TAutoPtr<TMomentMap>> TScheduleMap; // over-second schedule
+
+ TScheduleMap ScheduleMap;
+
+ THolder<NThreading::TLegacyFuture<void, false>> MainCycle;
+
+ static const ui64 IntrasecondThreshold = 1048576; // ~second
+ TAutoPtr<TMomentMap> ActiveSec;
+ volatile ui64* CurrentTimestamp = nullptr;
+ volatile ui64* CurrentMonotonic = nullptr;
+ TDeque<TAutoPtr<IEventHandle>> EventsToBeSent;
+
+ public:
+ static constexpr IActor::EActivityType ActorActivityType() {
+ return IActor::ACTOR_SYSTEM_SCHEDULER_ACTOR;
+ }
+
+ TSchedulerActor(const TSchedulerConfig& cfg)
+ : TActor(&TSchedulerActor::StateFunc)
+ , Cfg(cfg)
+ , TimerDescriptor(new TTimerDescriptor())
+ , PollerActor(MakePollerActorId())
+ {
+ Y_ASSERT(Cfg.ResolutionMicroseconds != 0);
+ Y_ASSERT(Cfg.ProgressThreshold != 0);
+ Become(&TSchedulerActor::StateFunc);
+ }
+
+ void Handle(TEvSchedulerInitialize::TPtr& ev, const TActorContext& ctx) {
+ const TEvSchedulerInitialize& evInitialize = *ev->Get();
+ Y_ASSERT(evInitialize.ScheduleReaders.size() != 0);
+ Readers.resize(evInitialize.ScheduleReaders.size());
+ Copy(evInitialize.ScheduleReaders.begin(), evInitialize.ScheduleReaders.end(), Readers.begin());
+
+ Y_ASSERT(evInitialize.CurrentTimestamp != nullptr);
+ CurrentTimestamp = evInitialize.CurrentTimestamp;
+
+ Y_ASSERT(evInitialize.CurrentMonotonic != nullptr);
+ CurrentMonotonic = evInitialize.CurrentMonotonic;
+
+ struct itimerspec new_time;
+ memset(&new_time, 0, sizeof(new_time));
+ new_time.it_value.tv_nsec = Cfg.ResolutionMicroseconds * 1000;
+ new_time.it_interval.tv_nsec = Cfg.ResolutionMicroseconds * 1000;
+ int ret = timerfd_settime(TimerDescriptor->GetDescriptor(), 0, &new_time, NULL);
+ Y_VERIFY(ret != -1, "timerfd_settime() failed with %s", strerror(errno));
+ const bool success = ctx.Send(PollerActor, new TEvPollerRegister(TimerDescriptor, SelfId(), {}));
+ Y_VERIFY(success);
+
+ RealTime = RelaxedLoad(CurrentTimestamp);
+ MonotonicTime = RelaxedLoad(CurrentMonotonic);
+
+ ActiveTick = AlignUp<ui64>(MonotonicTime, IntrasecondThreshold);
+ }
+
+ void Handle(TEvPollerRegisterResult::TPtr ev, const TActorContext& ctx) {
+ PollerToken = ev->Get()->PollerToken;
+ HandleSchedule(ctx);
+ }
+
+ void UpdateTime() {
+ RealTime = TInstant::Now().MicroSeconds();
+ MonotonicTime = Max(MonotonicTime, GetMonotonicMicroSeconds());
+ AtomicStore(CurrentTimestamp, RealTime);
+ AtomicStore(CurrentMonotonic, MonotonicTime);
+ }
+
+ void TryUpdateTime(NHPTimer::STime* lastTimeUpdate) {
+ NHPTimer::STime hpnow;
+ GetTimeFast(&hpnow);
+ const ui64 elapsedCycles = hpnow > *lastTimeUpdate ? hpnow - *lastTimeUpdate : 0;
+ if (elapsedCycles > Cfg.ResolutionMicroseconds * (NHPTimer::GetCyclesPerSecond() / IntrasecondThreshold)) {
+ UpdateTime();
+ GetTimeFast(lastTimeUpdate);
+ }
+ }
+
+ void HandleSchedule(const TActorContext& ctx) {
+ for (;;) {
+ NHPTimer::STime schedulingStart;
+ GetTimeFast(&schedulingStart);
+ NHPTimer::STime lastTimeUpdate = schedulingStart;
+
+ ui64 expired;
+ ssize_t bytesRead;
+ bytesRead = read(TimerDescriptor->GetDescriptor(), &expired, sizeof(expired));
+ if (bytesRead == -1) {
+ if (errno == EAGAIN) {
+ PollerToken->Request(true, false);
+ break;
+ } else if (errno == EINTR) {
+ continue;
+ }
+ }
+ Y_VERIFY(bytesRead == sizeof(expired), "Error while reading from timerfd, strerror# %s", strerror(errno));
+ UpdateTime();
+
+ ui32 eventsGottenFromQueues = 0;
+ // collect everything from queues
+ for (ui32 i = 0; i != Readers.size(); ++i) {
+ while (NSchedulerQueue::TEntry* x = Readers[i]->Pop()) {
+ const ui64 instant = AlignUp<ui64>(x->InstantMicroseconds, Cfg.ResolutionMicroseconds);
+ IEventHandle* const ev = x->Ev;
+ ISchedulerCookie* const cookie = x->Cookie;
+
+ // check is cookie still valid? looks like it will hurt performance w/o sagnificant memory save
+
+ if (instant <= ActiveTick) {
+ if (!ActiveSec)
+ ActiveSec.Reset(new TMomentMap());
+ TAutoPtr<NSchedulerQueue::TQueueType>& queue = (*ActiveSec)[instant];
+ if (!queue)
+ queue.Reset(new NSchedulerQueue::TQueueType());
+ queue->Writer.Push(instant, ev, cookie);
+ } else {
+ const ui64 intrasecond = AlignUp<ui64>(instant, IntrasecondThreshold);
+ TAutoPtr<TMomentMap>& msec = ScheduleMap[intrasecond];
+ if (!msec)
+ msec.Reset(new TMomentMap());
+ TAutoPtr<NSchedulerQueue::TQueueType>& queue = (*msec)[instant];
+ if (!queue)
+ queue.Reset(new NSchedulerQueue::TQueueType());
+ queue->Writer.Push(instant, ev, cookie);
+ }
+ ++eventsGottenFromQueues;
+ TryUpdateTime(&lastTimeUpdate);
+ }
+ }
+
+ ui64 eventSchedulingErrorUs = 0;
+ // send everything triggered on schedule
+ for (;;) {
+ while (!!ActiveSec && !ActiveSec->empty()) {
+ TMomentMap::iterator it = ActiveSec->begin();
+ if (it->first <= MonotonicTime) {
+ if (NSchedulerQueue::TQueueType* q = it->second.Get()) {
+ while (NSchedulerQueue::TEntry* x = q->Reader.Pop()) {
+ Y_VERIFY_DEBUG(x->InstantMicroseconds <= ActiveTick);
+ if (eventSchedulingErrorUs == 0 && MonotonicTime > x->InstantMicroseconds) {
+ eventSchedulingErrorUs = MonotonicTime - x->InstantMicroseconds;
+ }
+ IEventHandle* ev = x->Ev;
+ ISchedulerCookie* cookie = x->Cookie;
+ if (cookie) {
+ if (cookie->Detach()) {
+ EventsToBeSent.push_back(ev);
+ } else {
+ delete ev;
+ }
+ } else {
+ EventsToBeSent.push_back(ev);
+ }
+ TryUpdateTime(&lastTimeUpdate);
+ }
+ }
+ ActiveSec->erase(it);
+ } else {
+ break;
+ }
+ }
+
+ if (ActiveTick <= MonotonicTime) {
+ Y_VERIFY_DEBUG(!ActiveSec || ActiveSec->empty());
+ ActiveSec.Destroy();
+ ActiveTick += IntrasecondThreshold;
+ TScheduleMap::iterator it = ScheduleMap.find(ActiveTick);
+ if (it != ScheduleMap.end()) {
+ ActiveSec = it->second;
+ ScheduleMap.erase(it);
+ }
+ continue;
+ }
+
+ // ok, if we are here - then nothing is ready, so send step complete
+ break;
+ }
+
+ // Send all from buffer queue
+ const ui64 eventsToBeSentSize = EventsToBeSent.size();
+ ui32 sentCount = 0;
+ if (eventsToBeSentSize > Cfg.RelaxedSendThresholdEventsPerCycle) {
+ sentCount = Cfg.RelaxedSendPaceEventsPerCycle +
+ (eventsToBeSentSize - Cfg.RelaxedSendThresholdEventsPerCycle) / 2;
+ } else {
+ sentCount = Min(eventsToBeSentSize, Cfg.RelaxedSendPaceEventsPerCycle);
+ }
+ for (ui32 i = 0; i < sentCount; ++i) {
+ ctx.Send(EventsToBeSent.front());
+ EventsToBeSent.pop_front();
+ }
+
+ NHPTimer::STime hpnow;
+ GetTimeFast(&hpnow);
+ const ui64 processingTime = hpnow > schedulingStart ? hpnow - schedulingStart : 0;
+ const ui64 elapsedTimeMicroseconds = processingTime / (NHPTimer::GetCyclesPerSecond() / IntrasecondThreshold);
+ LWPROBE(ActorsystemScheduler, elapsedTimeMicroseconds, expired, eventsGottenFromQueues, sentCount,
+ eventsToBeSentSize, eventSchedulingErrorUs);
+ TryUpdateTime(&lastTimeUpdate);
+ }
+ }
+
+ STRICT_STFUNC(StateFunc,
+ HFunc(TEvSchedulerInitialize, Handle)
+ CFunc(TEvPollerReady::EventType, HandleSchedule)
+ CFunc(TEvents::TSystem::PoisonPill, Die)
+ HFunc(TEvPollerRegisterResult, Handle)
+ )
+ };
+
+ IActor* CreateSchedulerActor(const TSchedulerConfig& cfg) {
+ if (cfg.UseSchedulerActor) {
+ return new TSchedulerActor(cfg);
+ } else {
+ return nullptr;
+ }
+ }
+
+}
+
+#else // linux
+
+namespace NActors {
+ IActor* CreateSchedulerActor(const TSchedulerConfig& cfg) {
+ Y_UNUSED(cfg);
+ return nullptr;
+ }
+
+}
+
+#endif // linux
diff --git a/library/cpp/actors/core/scheduler_actor.h b/library/cpp/actors/core/scheduler_actor.h
new file mode 100644
index 0000000000..c2c561b43d
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_actor.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include "actor.h"
+#include "event_local.h"
+#include "events.h"
+#include "scheduler_basic.h"
+
+namespace NActors {
+ struct TEvSchedulerInitialize : TEventLocal<TEvSchedulerInitialize, TEvents::TSystem::Bootstrap> {
+ TVector<NSchedulerQueue::TReader*> ScheduleReaders;
+ volatile ui64* CurrentTimestamp;
+ volatile ui64* CurrentMonotonic;
+
+ TEvSchedulerInitialize(const TVector<NSchedulerQueue::TReader*>& scheduleReaders, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic)
+ : ScheduleReaders(scheduleReaders)
+ , CurrentTimestamp(currentTimestamp)
+ , CurrentMonotonic(currentMonotonic)
+ {
+ }
+ };
+
+ IActor* CreateSchedulerActor(const TSchedulerConfig& cfg);
+
+ inline TActorId MakeSchedulerActorId() {
+ char x[12] = {'s', 'c', 'h', 'e', 'd', 'u', 'l', 'e', 'r', 's', 'e', 'r'};
+ return TActorId(0, TStringBuf(x, 12));
+ }
+
+}
diff --git a/library/cpp/actors/core/scheduler_actor_ut.cpp b/library/cpp/actors/core/scheduler_actor_ut.cpp
new file mode 100644
index 0000000000..09b7369d36
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_actor_ut.cpp
@@ -0,0 +1,100 @@
+#include "actor_coroutine.h"
+#include "actorsystem.h"
+#include "executor_pool_basic.h"
+#include "scheduler_actor.h"
+#include "scheduler_basic.h"
+#include "events.h"
+#include "event_local.h"
+#include "hfunc.h"
+#include <library/cpp/actors/interconnect/poller_actor.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/system/sanitizers.h>
+
+using namespace NActors;
+
+Y_UNIT_TEST_SUITE(SchedulerActor) {
+ class TTestActor: public TActorBootstrapped<TTestActor> {
+ TManualEvent& DoneEvent;
+ TAtomic& EventsProcessed;
+ TInstant LastWakeup;
+ const TAtomicBase EventsTotalCount;
+ const TDuration ScheduleDelta;
+
+ public:
+ TTestActor(TManualEvent& doneEvent, TAtomic& eventsProcessed, TAtomicBase eventsTotalCount, ui32 scheduleDeltaMs)
+ : DoneEvent(doneEvent)
+ , EventsProcessed(eventsProcessed)
+ , EventsTotalCount(eventsTotalCount)
+ , ScheduleDelta(TDuration::MilliSeconds(scheduleDeltaMs))
+ {
+ }
+
+ void Bootstrap(const TActorContext& ctx) {
+ LastWakeup = ctx.Now();
+ Become(&TThis::StateFunc);
+ ctx.Schedule(ScheduleDelta, new TEvents::TEvWakeup());
+ }
+
+ void Handle(TEvents::TEvWakeup::TPtr& /*ev*/, const TActorContext& ctx) {
+ const TInstant now = ctx.Now();
+ UNIT_ASSERT(now - LastWakeup >= ScheduleDelta);
+ LastWakeup = now;
+
+ if (AtomicIncrement(EventsProcessed) == EventsTotalCount) {
+ DoneEvent.Signal();
+ } else {
+ ctx.Schedule(ScheduleDelta, new TEvents::TEvWakeup());
+ }
+ }
+
+ STRICT_STFUNC(StateFunc, {HFunc(TEvents::TEvWakeup, Handle)})
+ };
+
+ void Test(TAtomicBase eventsTotalCount, ui32 scheduleDeltaMs) {
+ THolder<TActorSystemSetup> setup = MakeHolder<TActorSystemSetup>();
+ setup->NodeId = 0;
+ setup->ExecutorsCount = 1;
+ setup->Executors.Reset(new TAutoPtr<IExecutorPool>[setup->ExecutorsCount]);
+ for (ui32 i = 0; i < setup->ExecutorsCount; ++i) {
+ setup->Executors[i] = new TBasicExecutorPool(i, 5, 10, "basic");
+ }
+ // create poller actor (whether platform supports it)
+ TActorId pollerActorId;
+ if (IActor* poller = CreatePollerActor()) {
+ pollerActorId = MakePollerActorId();
+ setup->LocalServices.emplace_back(pollerActorId, TActorSetupCmd(poller, TMailboxType::ReadAsFilled, 0));
+ }
+ TActorId schedulerActorId;
+ if (IActor* schedulerActor = CreateSchedulerActor(TSchedulerConfig())) {
+ schedulerActorId = MakeSchedulerActorId();
+ setup->LocalServices.emplace_back(schedulerActorId, TActorSetupCmd(schedulerActor, TMailboxType::ReadAsFilled, 0));
+ }
+ setup->Scheduler = CreateSchedulerThread(TSchedulerConfig());
+
+ TActorSystem actorSystem(setup);
+
+ actorSystem.Start();
+
+ TManualEvent doneEvent;
+ TAtomic eventsProcessed = 0;
+ actorSystem.Register(new TTestActor(doneEvent, eventsProcessed, eventsTotalCount, scheduleDeltaMs));
+ doneEvent.WaitI();
+
+ UNIT_ASSERT(AtomicGet(eventsProcessed) == eventsTotalCount);
+
+ actorSystem.Stop();
+ }
+
+ Y_UNIT_TEST(LongEvents) {
+ Test(10, 500);
+ }
+
+ Y_UNIT_TEST(MediumEvents) {
+ Test(100, 50);
+ }
+
+ Y_UNIT_TEST(QuickEvents) {
+ Test(1000, 5);
+ }
+}
diff --git a/library/cpp/actors/core/scheduler_basic.cpp b/library/cpp/actors/core/scheduler_basic.cpp
new file mode 100644
index 0000000000..fba200e16b
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_basic.cpp
@@ -0,0 +1,274 @@
+#include "scheduler_basic.h"
+#include "scheduler_queue.h"
+
+#include <library/cpp/actors/util/datetime.h>
+#include <library/cpp/actors/util/thread.h>
+
+#ifdef BALLOC
+#include <library/cpp/balloc/optional/operators.h>
+#endif
+
+namespace NActors {
+
+ struct TBasicSchedulerThread::TMonCounters {
+ NMonitoring::TDynamicCounters::TCounterPtr TimeDelayMs;
+ NMonitoring::TDynamicCounters::TCounterPtr QueueSize;
+ NMonitoring::TDynamicCounters::TCounterPtr EventsSent;
+ NMonitoring::TDynamicCounters::TCounterPtr EventsDropped;
+ NMonitoring::TDynamicCounters::TCounterPtr EventsAdded;
+ NMonitoring::TDynamicCounters::TCounterPtr Iterations;
+ NMonitoring::TDynamicCounters::TCounterPtr Sleeps;
+ NMonitoring::TDynamicCounters::TCounterPtr ElapsedMicrosec;
+
+ TMonCounters(const NMonitoring::TDynamicCounterPtr& counters)
+ : TimeDelayMs(counters->GetCounter("Scheduler/TimeDelayMs", false))
+ , QueueSize(counters->GetCounter("Scheduler/QueueSize", false))
+ , EventsSent(counters->GetCounter("Scheduler/EventsSent", true))
+ , EventsDropped(counters->GetCounter("Scheduler/EventsDropped", true))
+ , EventsAdded(counters->GetCounter("Scheduler/EventsAdded", true))
+ , Iterations(counters->GetCounter("Scheduler/Iterations", true))
+ , Sleeps(counters->GetCounter("Scheduler/Sleeps", true))
+ , ElapsedMicrosec(counters->GetCounter("Scheduler/ElapsedMicrosec", true))
+ { }
+ };
+
+ TBasicSchedulerThread::TBasicSchedulerThread(const TSchedulerConfig& config)
+ : Config(config)
+ , MonCounters(Config.MonCounters ? new TMonCounters(Config.MonCounters) : nullptr)
+ , ActorSystem(nullptr)
+ , CurrentTimestamp(nullptr)
+ , CurrentMonotonic(nullptr)
+ , TotalReaders(0)
+ , StopFlag(false)
+ , ScheduleMap(3600)
+ {
+ Y_VERIFY(!Config.UseSchedulerActor, "Cannot create scheduler thread because Config.UseSchedulerActor# true");
+ }
+
+ TBasicSchedulerThread::~TBasicSchedulerThread() {
+ Y_VERIFY(!MainCycle);
+ }
+
+ void TBasicSchedulerThread::CycleFunc() {
+#ifdef BALLOC
+ ThreadDisableBalloc();
+#endif
+ ::SetCurrentThreadName("Scheduler");
+
+ ui64 currentMonotonic = RelaxedLoad(CurrentMonotonic);
+ ui64 throttledMonotonic = currentMonotonic;
+
+ ui64 activeTick = AlignUp<ui64>(throttledMonotonic, IntrasecondThreshold);
+ TAutoPtr<TMomentMap> activeSec;
+
+ NHPTimer::STime hpprev = GetCycleCountFast();
+ ui64 nextTimestamp = TInstant::Now().MicroSeconds();
+ ui64 nextMonotonic = Max(currentMonotonic, GetMonotonicMicroSeconds());
+
+ while (!AtomicLoad(&StopFlag)) {
+ {
+ const ui64 delta = nextMonotonic - throttledMonotonic;
+ const ui64 elapsedDelta = nextMonotonic - currentMonotonic;
+ const ui64 threshold = Max(Min(Config.ProgressThreshold, 2 * elapsedDelta), ui64(1));
+
+ throttledMonotonic = (delta > threshold) ? throttledMonotonic + threshold : nextMonotonic;
+
+ if (MonCounters) {
+ *MonCounters->TimeDelayMs = (nextMonotonic - throttledMonotonic) / 1000;
+ }
+ }
+ AtomicStore(CurrentTimestamp, nextTimestamp);
+ AtomicStore(CurrentMonotonic, nextMonotonic);
+ currentMonotonic = nextMonotonic;
+
+ if (MonCounters) {
+ ++*MonCounters->Iterations;
+ }
+
+ bool somethingDone = false;
+
+ // first step - send everything triggered on schedule
+ ui64 eventsSent = 0;
+ ui64 eventsDropped = 0;
+ for (;;) {
+ while (!!activeSec && !activeSec->empty()) {
+ TMomentMap::iterator it = activeSec->begin();
+ if (it->first <= throttledMonotonic) {
+ if (NSchedulerQueue::TQueueType* q = it->second.Get()) {
+ while (NSchedulerQueue::TEntry* x = q->Reader.Pop()) {
+ somethingDone = true;
+ Y_VERIFY_DEBUG(x->InstantMicroseconds <= activeTick);
+ IEventHandle* ev = x->Ev;
+ ISchedulerCookie* cookie = x->Cookie;
+ // TODO: lazy send with backoff queue to not hang over contended mailboxes
+ if (cookie) {
+ if (cookie->Detach()) {
+ ActorSystem->Send(ev);
+ ++eventsSent;
+ } else {
+ delete ev;
+ ++eventsDropped;
+ }
+ } else {
+ ActorSystem->Send(ev);
+ ++eventsSent;
+ }
+ }
+ }
+ activeSec->erase(it);
+ } else
+ break;
+ }
+
+ if (activeTick <= throttledMonotonic) {
+ Y_VERIFY_DEBUG(!activeSec || activeSec->empty());
+ activeSec.Destroy();
+ activeTick += IntrasecondThreshold;
+ TScheduleMap::iterator it = ScheduleMap.find(activeTick);
+ if (it != ScheduleMap.end()) {
+ activeSec = it->second;
+ ScheduleMap.erase(it);
+ }
+ continue;
+ }
+
+ // ok, if we are here - then nothing is ready, so send step complete
+ break;
+ }
+
+ // second step - collect everything from queues
+
+ ui64 eventsAdded = 0;
+ for (ui32 i = 0; i != TotalReaders; ++i) {
+ while (NSchedulerQueue::TEntry* x = Readers[i]->Pop()) {
+ somethingDone = true;
+ const ui64 instant = AlignUp<ui64>(x->InstantMicroseconds, Config.ResolutionMicroseconds);
+ IEventHandle* const ev = x->Ev;
+ ISchedulerCookie* const cookie = x->Cookie;
+
+ // check is cookie still valid? looks like it will hurt performance w/o sagnificant memory save
+
+ if (instant <= activeTick) {
+ if (!activeSec)
+ activeSec.Reset(new TMomentMap());
+ TAutoPtr<NSchedulerQueue::TQueueType>& queue = (*activeSec)[instant];
+ if (!queue)
+ queue.Reset(new NSchedulerQueue::TQueueType());
+ queue->Writer.Push(instant, ev, cookie);
+ } else {
+ const ui64 intrasecond = AlignUp<ui64>(instant, IntrasecondThreshold);
+ TAutoPtr<TMomentMap>& msec = ScheduleMap[intrasecond];
+ if (!msec)
+ msec.Reset(new TMomentMap());
+ TAutoPtr<NSchedulerQueue::TQueueType>& queue = (*msec)[instant];
+ if (!queue)
+ queue.Reset(new NSchedulerQueue::TQueueType());
+ queue->Writer.Push(instant, ev, cookie);
+ }
+
+ ++eventsAdded;
+ }
+ }
+
+ NHPTimer::STime hpnow = GetCycleCountFast();
+
+ if (MonCounters) {
+ *MonCounters->QueueSize -= eventsSent + eventsDropped;
+ *MonCounters->QueueSize += eventsAdded;
+ *MonCounters->EventsSent += eventsSent;
+ *MonCounters->EventsDropped += eventsDropped;
+ *MonCounters->EventsAdded += eventsAdded;
+ *MonCounters->ElapsedMicrosec += NHPTimer::GetSeconds(hpnow - hpprev) * 1000000;
+ }
+
+ hpprev = hpnow;
+ nextTimestamp = TInstant::Now().MicroSeconds();
+ nextMonotonic = Max(currentMonotonic, GetMonotonicMicroSeconds());
+
+ // ok complete, if nothing left - sleep
+ if (!somethingDone) {
+ const ui64 nextInstant = AlignDown<ui64>(throttledMonotonic + Config.ResolutionMicroseconds, Config.ResolutionMicroseconds);
+ if (nextMonotonic >= nextInstant) // already in next time-slice
+ continue;
+
+ const ui64 delta = nextInstant - nextMonotonic;
+ if (delta < Config.SpinThreshold) // not so much time left, just spin
+ continue;
+
+ if (MonCounters) {
+ ++*MonCounters->Sleeps;
+ }
+
+ NanoSleep(delta * 1000); // ok, looks like we should sleep a bit.
+
+ // Don't count sleep in elapsed microseconds
+ hpprev = GetCycleCountFast();
+ nextTimestamp = TInstant::Now().MicroSeconds();
+ nextMonotonic = Max(currentMonotonic, GetMonotonicMicroSeconds());
+ }
+ }
+ // ok, die!
+ }
+
+ void TBasicSchedulerThread::Prepare(TActorSystem* actorSystem, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic) {
+ ActorSystem = actorSystem;
+ CurrentTimestamp = currentTimestamp;
+ CurrentMonotonic = currentMonotonic;
+ *CurrentTimestamp = TInstant::Now().MicroSeconds();
+ *CurrentMonotonic = GetMonotonicMicroSeconds();
+ }
+
+ void TBasicSchedulerThread::PrepareSchedules(NSchedulerQueue::TReader** readers, ui32 scheduleReadersCount) {
+ Y_VERIFY(scheduleReadersCount > 0);
+ TotalReaders = scheduleReadersCount;
+ Readers.Reset(new NSchedulerQueue::TReader*[scheduleReadersCount]);
+ Copy(readers, readers + scheduleReadersCount, Readers.Get());
+ }
+
+ void TBasicSchedulerThread::PrepareStart() {
+ // Called after actor system is initialized, but before executor threads
+ // are started, giving us a chance to update current timestamp with a
+ // more recent value, taking initialization time into account. This is
+ // safe to do, since scheduler thread is not started yet, so no other
+ // threads are updating time concurrently.
+ AtomicStore(CurrentTimestamp, TInstant::Now().MicroSeconds());
+ AtomicStore(CurrentMonotonic, Max(RelaxedLoad(CurrentMonotonic), GetMonotonicMicroSeconds()));
+ }
+
+ void TBasicSchedulerThread::Start() {
+ MainCycle.Reset(new NThreading::TLegacyFuture<void, false>(std::bind(&TBasicSchedulerThread::CycleFunc, this)));
+ }
+
+ void TBasicSchedulerThread::PrepareStop() {
+ AtomicStore(&StopFlag, true);
+ }
+
+ void TBasicSchedulerThread::Stop() {
+ MainCycle->Get();
+ MainCycle.Destroy();
+ }
+
+}
+
+#ifdef __linux__
+
+namespace NActors {
+ ISchedulerThread* CreateSchedulerThread(const TSchedulerConfig& config) {
+ if (config.UseSchedulerActor) {
+ return new TMockSchedulerThread();
+ } else {
+ return new TBasicSchedulerThread(config);
+ }
+ }
+
+}
+
+#else // __linux__
+
+namespace NActors {
+ ISchedulerThread* CreateSchedulerThread(const TSchedulerConfig& config) {
+ return new TBasicSchedulerThread(config);
+ }
+}
+
+#endif // __linux__
diff --git a/library/cpp/actors/core/scheduler_basic.h b/library/cpp/actors/core/scheduler_basic.h
new file mode 100644
index 0000000000..2ccde39235
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_basic.h
@@ -0,0 +1,81 @@
+#pragma once
+
+#include "actorsystem.h"
+#include "monotonic.h"
+#include "scheduler_queue.h"
+#include <library/cpp/actors/util/queue_chunk.h>
+#include <library/cpp/threading/future/legacy_future.h>
+#include <util/generic/hash.h>
+#include <util/generic/map.h>
+
+namespace NActors {
+
+ class TBasicSchedulerThread: public ISchedulerThread {
+ // TODO: replace with NUMA-local threads and per-thread schedules
+ const TSchedulerConfig Config;
+
+ struct TMonCounters;
+ const THolder<TMonCounters> MonCounters;
+
+ TActorSystem* ActorSystem;
+ volatile ui64* CurrentTimestamp;
+ volatile ui64* CurrentMonotonic;
+
+ ui32 TotalReaders;
+ TArrayHolder<NSchedulerQueue::TReader*> Readers;
+
+ volatile bool StopFlag;
+
+ typedef TMap<ui64, TAutoPtr<NSchedulerQueue::TQueueType>> TMomentMap; // intrasecond queues
+ typedef THashMap<ui64, TAutoPtr<TMomentMap>> TScheduleMap; // over-second schedule
+
+ TScheduleMap ScheduleMap;
+
+ THolder<NThreading::TLegacyFuture<void, false>> MainCycle;
+
+ static const ui64 IntrasecondThreshold = 1048576; // ~second
+
+ void CycleFunc();
+
+ public:
+ TBasicSchedulerThread(const TSchedulerConfig& config = TSchedulerConfig());
+ ~TBasicSchedulerThread();
+
+ void Prepare(TActorSystem* actorSystem, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic) override;
+ void PrepareSchedules(NSchedulerQueue::TReader** readers, ui32 scheduleReadersCount) override;
+
+ void PrepareStart() override;
+ void Start() override;
+ void PrepareStop() override;
+ void Stop() override;
+ };
+
+ class TMockSchedulerThread: public ISchedulerThread {
+ public:
+ virtual ~TMockSchedulerThread() override {
+ }
+
+ void Prepare(TActorSystem* actorSystem, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic) override {
+ Y_UNUSED(actorSystem);
+ *currentTimestamp = TInstant::Now().MicroSeconds();
+ *currentMonotonic = GetMonotonicMicroSeconds();
+ }
+
+ void PrepareSchedules(NSchedulerQueue::TReader** readers, ui32 scheduleReadersCount) override {
+ Y_UNUSED(readers);
+ Y_UNUSED(scheduleReadersCount);
+ }
+
+ void Start() override {
+ }
+
+ void PrepareStop() override {
+ }
+
+ void Stop() override {
+ }
+ };
+
+ ISchedulerThread* CreateSchedulerThread(const TSchedulerConfig& cfg);
+
+}
diff --git a/library/cpp/actors/core/scheduler_cookie.cpp b/library/cpp/actors/core/scheduler_cookie.cpp
new file mode 100644
index 0000000000..0fa6f543a7
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_cookie.cpp
@@ -0,0 +1,84 @@
+#include "scheduler_cookie.h"
+
+namespace NActors {
+ class TSchedulerCookie2Way: public ISchedulerCookie {
+ TAtomic Value;
+
+ public:
+ TSchedulerCookie2Way()
+ : Value(2)
+ {
+ }
+
+ bool IsArmed() noexcept override {
+ return (AtomicGet(Value) == 2);
+ }
+
+ bool Detach() noexcept override {
+ const ui64 x = AtomicDecrement(Value);
+ if (x == 1)
+ return true;
+
+ if (x == 0) {
+ delete this;
+ return false;
+ }
+
+ Y_FAIL();
+ }
+
+ bool DetachEvent() noexcept override {
+ Y_FAIL();
+ }
+ };
+
+ ISchedulerCookie* ISchedulerCookie::Make2Way() {
+ return new TSchedulerCookie2Way();
+ }
+
+ class TSchedulerCookie3Way: public ISchedulerCookie {
+ TAtomic Value;
+
+ public:
+ TSchedulerCookie3Way()
+ : Value(3)
+ {
+ }
+
+ bool IsArmed() noexcept override {
+ return (AtomicGet(Value) == 3);
+ }
+
+ bool Detach() noexcept override {
+ const ui64 x = AtomicDecrement(Value);
+ if (x == 2)
+ return true;
+ if (x == 1)
+ return false;
+ if (x == 0) {
+ delete this;
+ return false;
+ }
+
+ Y_FAIL();
+ }
+
+ bool DetachEvent() noexcept override {
+ const ui64 x = AtomicDecrement(Value);
+ if (x == 2)
+ return false;
+ if (x == 1)
+ return true;
+ if (x == 0) {
+ delete this;
+ return false;
+ }
+
+ Y_FAIL();
+ }
+ };
+
+ ISchedulerCookie* ISchedulerCookie::Make3Way() {
+ return new TSchedulerCookie3Way();
+ }
+}
diff --git a/library/cpp/actors/core/scheduler_cookie.h b/library/cpp/actors/core/scheduler_cookie.h
new file mode 100644
index 0000000000..2c20ca67f3
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_cookie.h
@@ -0,0 +1,78 @@
+#pragma once
+
+#include "defs.h"
+#include <util/generic/noncopyable.h>
+
+namespace NActors {
+ class ISchedulerCookie : TNonCopyable {
+ protected:
+ virtual ~ISchedulerCookie() {
+ }
+
+ public:
+ virtual bool Detach() noexcept = 0;
+ virtual bool DetachEvent() noexcept = 0;
+ virtual bool IsArmed() noexcept = 0;
+
+ static ISchedulerCookie* Make2Way();
+ static ISchedulerCookie* Make3Way();
+ };
+
+ class TSchedulerCookieHolder : TNonCopyable {
+ ISchedulerCookie* Cookie;
+
+ public:
+ TSchedulerCookieHolder()
+ : Cookie(nullptr)
+ {
+ }
+
+ TSchedulerCookieHolder(ISchedulerCookie* x)
+ : Cookie(x)
+ {
+ }
+
+ ~TSchedulerCookieHolder() {
+ Detach();
+ }
+
+ bool operator==(const TSchedulerCookieHolder& x) const noexcept {
+ return (Cookie == x.Cookie);
+ }
+
+ ISchedulerCookie* Get() const {
+ return Cookie;
+ }
+
+ ISchedulerCookie* Release() {
+ ISchedulerCookie* result = Cookie;
+ Cookie = nullptr;
+ return result;
+ }
+
+ void Reset(ISchedulerCookie* cookie) {
+ Detach();
+ Cookie = cookie;
+ }
+
+ bool Detach() noexcept {
+ if (Cookie) {
+ const bool res = Cookie->Detach();
+ Cookie = nullptr;
+ return res;
+ } else {
+ return false;
+ }
+ }
+
+ bool DetachEvent() noexcept {
+ if (Cookie) {
+ const bool res = Cookie->DetachEvent();
+ Cookie = nullptr;
+ return res;
+ } else {
+ return false;
+ }
+ }
+ };
+}
diff --git a/library/cpp/actors/core/scheduler_queue.h b/library/cpp/actors/core/scheduler_queue.h
new file mode 100644
index 0000000000..3b8fac28f0
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_queue.h
@@ -0,0 +1,120 @@
+#pragma once
+
+#include <library/cpp/actors/util/queue_chunk.h>
+
+namespace NActors {
+ class IEventHandle;
+ class ISchedulerCookie;
+
+ namespace NSchedulerQueue {
+ struct TEntry {
+ ui64 InstantMicroseconds;
+ IEventHandle* Ev;
+ ISchedulerCookie* Cookie;
+ };
+
+ struct TChunk : TQueueChunkDerived<TEntry, 512, TChunk> {};
+
+ class TReader;
+ class TWriter;
+ class TWriterWithPadding;
+
+ class TReader : ::TNonCopyable {
+ TChunk* ReadFrom;
+ ui32 ReadPosition;
+
+ friend class TWriter;
+
+ public:
+ TReader()
+ : ReadFrom(new TChunk())
+ , ReadPosition(0)
+ {
+ }
+
+ ~TReader() {
+ while (TEntry* x = Pop()) {
+ if (x->Cookie)
+ x->Cookie->Detach();
+ delete x->Ev;
+ }
+ delete ReadFrom;
+ }
+
+ TEntry* Pop() {
+ TChunk* head = ReadFrom;
+ if (ReadPosition != TChunk::EntriesCount) {
+ if (AtomicLoad(&head->Entries[ReadPosition].InstantMicroseconds) != 0)
+ return const_cast<TEntry*>(&head->Entries[ReadPosition++]);
+ else
+ return nullptr;
+ } else if (TChunk* next = AtomicLoad(&head->Next)) {
+ ReadFrom = next;
+ delete head;
+ ReadPosition = 0;
+ return Pop();
+ }
+
+ return nullptr;
+ }
+ };
+
+ class TWriter : ::TNonCopyable {
+ TChunk* WriteTo;
+ ui32 WritePosition;
+
+ public:
+ TWriter()
+ : WriteTo(nullptr)
+ , WritePosition(0)
+ {
+ }
+
+ void Init(const TReader& reader) {
+ WriteTo = reader.ReadFrom;
+ WritePosition = 0;
+ }
+
+ void Push(ui64 instantMicrosends, IEventHandle* ev, ISchedulerCookie* cookie) {
+ if (Y_UNLIKELY(instantMicrosends == 0)) {
+ // Protect against Pop() getting stuck forever
+ instantMicrosends = 1;
+ }
+ if (WritePosition != TChunk::EntriesCount) {
+ volatile TEntry& entry = WriteTo->Entries[WritePosition];
+ entry.Cookie = cookie;
+ entry.Ev = ev;
+ AtomicStore(&entry.InstantMicroseconds, instantMicrosends);
+ ++WritePosition;
+ } else {
+ TChunk* next = new TChunk();
+ volatile TEntry& entry = next->Entries[0];
+ entry.Cookie = cookie;
+ entry.Ev = ev;
+ entry.InstantMicroseconds = instantMicrosends;
+ AtomicStore(&WriteTo->Next, next);
+ WriteTo = next;
+ WritePosition = 1;
+ }
+ }
+ };
+
+ class TWriterWithPadding: public TWriter {
+ private:
+ ui8 CacheLinePadding[64 - sizeof(TWriter)];
+
+ void UnusedCacheLinePadding() {
+ Y_UNUSED(CacheLinePadding);
+ }
+ };
+
+ struct TQueueType {
+ TReader Reader;
+ TWriter Writer;
+
+ TQueueType() {
+ Writer.Init(Reader);
+ }
+ };
+ }
+}
diff --git a/library/cpp/actors/core/servicemap.h b/library/cpp/actors/core/servicemap.h
new file mode 100644
index 0000000000..d72e50cae5
--- /dev/null
+++ b/library/cpp/actors/core/servicemap.h
@@ -0,0 +1,168 @@
+#pragma once
+
+#include "defs.h"
+
+namespace NActors {
+ // wait-free one writer multi reader hash-tree for service mapping purposes
+ // on fast updates on same key - could lead to false-negatives, we don't care as such cases are broken from service-map app logic
+
+ template <typename TKey, typename TValue, typename THash, ui64 BaseSize = 256 * 1024, ui64 ExtCount = 4, ui64 ExtBranching = 4>
+ class TServiceMap : TNonCopyable {
+ struct TEntry : TNonCopyable {
+ ui32 CounterIn;
+ ui32 CounterOut;
+ TKey Key;
+ TValue Value;
+
+ TEntry()
+ : CounterIn(0)
+ , CounterOut(0)
+ , Key()
+ , Value()
+ {
+ }
+ };
+
+ struct TBranch : TNonCopyable {
+ TEntry Entries[ExtCount];
+ TBranch* Branches[ExtBranching];
+
+ TBranch() {
+ Fill(Branches, Branches + ExtBranching, (TBranch*)nullptr);
+ }
+ };
+
+ ui32 Counter;
+ TBranch* Line[BaseSize];
+
+ bool ScanBranch(TBranch* branch, const TKey& key, ui64 hash, TValue& ret) {
+ for (ui32 i = 0; i != ExtCount; ++i) {
+ const TEntry& entry = branch->Entries[i];
+ const ui32 counterIn = AtomicLoad(&entry.CounterIn);
+ if (counterIn != 0 && entry.Key == key) {
+ ret = entry.Value;
+ const ui32 counterOut = AtomicLoad(&entry.CounterOut);
+ if (counterOut == counterIn)
+ return true;
+ }
+ }
+
+ const ui64 hash0 = hash % ExtBranching;
+ if (TBranch* next = AtomicLoad(branch->Branches + hash0))
+ return ScanBranch(next, key, hash / ExtBranching, ret);
+
+ return false;
+ }
+
+ void ScanZeroOld(TBranch* branch, const TKey& key, ui64 hash, TEntry** zeroEntry, TEntry*& oldEntry) {
+ for (ui32 i = 0; i != ExtCount; ++i) {
+ TEntry& entry = branch->Entries[i];
+ if (entry.CounterIn == 0) {
+ if (zeroEntry && !*zeroEntry) {
+ *zeroEntry = &entry;
+ if (oldEntry != nullptr)
+ return;
+ }
+ } else {
+ if (entry.Key == key) {
+ oldEntry = &entry;
+ if (!zeroEntry || *zeroEntry)
+ return;
+ }
+ }
+ }
+
+ const ui64 hash0 = hash % ExtBranching;
+ if (TBranch* next = branch->Branches[hash0]) {
+ ScanZeroOld(next, key, hash / ExtBranching, zeroEntry, oldEntry);
+ } else { // found tail, if zeroEntry requested, but not yet found - insert one
+ if (zeroEntry && !*zeroEntry) {
+ TBranch* next = new TBranch();
+ *zeroEntry = next->Entries;
+ AtomicStore(branch->Branches + hash0, next);
+ }
+ }
+ }
+
+ public:
+ TServiceMap()
+ : Counter(0)
+ {
+ Fill(Line, Line + BaseSize, (TBranch*)nullptr);
+ }
+
+ ~TServiceMap() {
+ for (ui64 i = 0; i < BaseSize; ++i) {
+ delete Line[i];
+ }
+ }
+
+ TValue Find(const TKey& key) {
+ THash hashOp;
+ const ui64 hash = hashOp(key);
+ const ui64 hash0 = hash % BaseSize;
+
+ if (TBranch* branch = AtomicLoad(Line + hash0)) {
+ TValue ret;
+ if (ScanBranch(branch, key, hash / BaseSize, ret))
+ return ret;
+ }
+
+ return TValue();
+ }
+
+ // returns true on update, false on insert
+ TValue Update(const TKey& key, const TValue& value) {
+ THash hashOp;
+ const ui64 hash = hashOp(key);
+ const ui64 hash0 = hash % BaseSize;
+
+ TEntry* zeroEntry = nullptr;
+ TEntry* oldEntry = nullptr;
+
+ if (TBranch* branch = Line[hash0]) {
+ ScanZeroOld(branch, key, hash / BaseSize, &zeroEntry, oldEntry);
+ } else {
+ TBranch* next = new TBranch();
+ zeroEntry = next->Entries;
+ AtomicStore(Line + hash0, next);
+ }
+
+ // now we got both entries, first - push new one
+ const ui32 counter = AtomicUi32Increment(&Counter);
+ AtomicStore(&zeroEntry->CounterOut, counter);
+ zeroEntry->Key = key;
+ zeroEntry->Value = value;
+ AtomicStore(&zeroEntry->CounterIn, counter);
+
+ if (oldEntry != nullptr) {
+ const TValue ret = oldEntry->Value;
+ AtomicStore<ui32>(&oldEntry->CounterOut, 0);
+ AtomicStore<ui32>(&oldEntry->CounterIn, 0);
+ return ret;
+ } else {
+ return TValue();
+ }
+ }
+
+ bool Erase(const TKey& key) {
+ THash hashOp;
+ const ui64 hash = hashOp(key);
+ const ui64 hash0 = hash % BaseSize;
+
+ TEntry* oldEntry = 0;
+
+ if (TBranch* branch = Line[hash0]) {
+ ScanZeroOld(branch, key, hash / BaseSize, 0, oldEntry);
+ }
+
+ if (oldEntry != 0) {
+ AtomicStore<ui32>(&oldEntry->CounterOut, 0);
+ AtomicStore<ui32>(&oldEntry->CounterIn, 0);
+ return true;
+ } else {
+ return false;
+ }
+ }
+ };
+}
diff --git a/library/cpp/actors/core/ut/ya.make b/library/cpp/actors/core/ut/ya.make
new file mode 100644
index 0000000000..3ee28d5850
--- /dev/null
+++ b/library/cpp/actors/core/ut/ya.make
@@ -0,0 +1,46 @@
+UNITTEST_FOR(library/cpp/actors/core)
+
+OWNER(
+ alexvru
+ g:kikimr
+)
+
+FORK_SUBTESTS()
+IF (SANITIZER_TYPE)
+ SIZE(LARGE)
+ TIMEOUT(1200)
+ TAG(ya:fat)
+ SPLIT_FACTOR(20)
+ REQUIREMENTS(
+ ram:32
+ )
+ELSE()
+ SIZE(MEDIUM)
+ TIMEOUT(600)
+ REQUIREMENTS(
+ ram:16
+ )
+ENDIF()
+
+
+PEERDIR(
+ library/cpp/actors/interconnect
+ library/cpp/actors/testlib
+)
+
+SRCS(
+ actor_coroutine_ut.cpp
+ actor_ut.cpp
+ actorsystem_ut.cpp
+ ask_ut.cpp
+ balancer_ut.cpp
+ event_pb_payload_ut.cpp
+ event_pb_ut.cpp
+ executor_pool_basic_ut.cpp
+ executor_pool_united_ut.cpp
+ log_ut.cpp
+ memory_tracker_ut.cpp
+ scheduler_actor_ut.cpp
+)
+
+END()
diff --git a/library/cpp/actors/core/worker_context.cpp b/library/cpp/actors/core/worker_context.cpp
new file mode 100644
index 0000000000..ada6c997d4
--- /dev/null
+++ b/library/cpp/actors/core/worker_context.cpp
@@ -0,0 +1,7 @@
+#include "worker_context.h"
+#include "probes.h"
+
+namespace NActors {
+ LWTRACE_USING(ACTORLIB_PROVIDER);
+
+}
diff --git a/library/cpp/actors/core/worker_context.h b/library/cpp/actors/core/worker_context.h
new file mode 100644
index 0000000000..b4c37a7629
--- /dev/null
+++ b/library/cpp/actors/core/worker_context.h
@@ -0,0 +1,175 @@
+#pragma once
+
+#include "defs.h"
+
+#include "actorsystem.h"
+#include "event.h"
+#include "lease.h"
+#include "mailbox.h"
+#include "mon_stats.h"
+
+#include <library/cpp/actors/util/datetime.h>
+#include <library/cpp/actors/util/intrinsics.h>
+#include <library/cpp/actors/util/thread.h>
+
+#include <library/cpp/lwtrace/shuttle.h>
+
+namespace NActors {
+ struct TWorkerContext {
+ const TWorkerId WorkerId;
+ const TCpuId CpuId;
+ TLease Lease;
+ IExecutorPool* Executor = nullptr;
+ TMailboxTable* MailboxTable = nullptr;
+ ui64 TimePerMailboxTs = 0;
+ ui32 EventsPerMailbox = 0;
+ ui64 SoftDeadlineTs = ui64(-1);
+ TExecutorThreadStats* Stats = &WorkerStats; // pool stats
+ TExecutorThreadStats WorkerStats;
+ TPoolId PoolId = MaxPools;
+ mutable NLWTrace::TOrbit Orbit;
+
+ TWorkerContext(TWorkerId workerId, TCpuId cpuId, size_t activityVecSize)
+ : WorkerId(workerId)
+ , CpuId(cpuId)
+ , Lease(WorkerId, NeverExpire)
+ , WorkerStats(activityVecSize)
+ {}
+
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+ void GetCurrentStats(TExecutorThreadStats& statsCopy) const {
+ statsCopy = TExecutorThreadStats();
+ statsCopy.Aggregate(*Stats);
+ }
+
+ void AddElapsedCycles(ui32 activityType, i64 elapsed) {
+ Y_VERIFY_DEBUG(activityType < Stats->MaxActivityType());
+ RelaxedStore(&Stats->ElapsedTicks, RelaxedLoad(&Stats->ElapsedTicks) + elapsed);
+ RelaxedStore(&Stats->ElapsedTicksByActivity[activityType], RelaxedLoad(&Stats->ElapsedTicksByActivity[activityType]) + elapsed);
+ }
+
+ void AddParkedCycles(i64 elapsed) {
+ RelaxedStore(&Stats->ParkedTicks, RelaxedLoad(&Stats->ParkedTicks) + elapsed);
+ }
+
+ void AddBlockedCycles(i64 elapsed) {
+ RelaxedStore(&Stats->BlockedTicks, RelaxedLoad(&Stats->BlockedTicks) + elapsed);
+ }
+
+ void IncrementSentEvents() {
+ RelaxedStore(&Stats->SentEvents, RelaxedLoad(&Stats->SentEvents) + 1);
+ }
+
+ void IncrementPreemptedEvents() {
+ RelaxedStore(&Stats->PreemptedEvents, RelaxedLoad(&Stats->PreemptedEvents) + 1);
+ }
+
+ void DecrementActorsAliveByActivity(ui32 activityType) {
+ if (activityType >= Stats->MaxActivityType()) {
+ activityType = 0;
+ }
+ RelaxedStore(&Stats->ActorsAliveByActivity[activityType], Stats->ActorsAliveByActivity[activityType] - 1);
+ }
+
+ inline void IncrementNonDeliveredEvents() {
+ RelaxedStore(&Stats->NonDeliveredEvents, RelaxedLoad(&Stats->NonDeliveredEvents) + 1);
+ }
+
+ inline void IncrementMailboxPushedOutBySoftPreemption() {
+ RelaxedStore(&Stats->MailboxPushedOutBySoftPreemption, RelaxedLoad(&Stats->MailboxPushedOutBySoftPreemption) + 1);
+ }
+
+ inline void IncrementMailboxPushedOutByTime() {
+ RelaxedStore(&Stats->MailboxPushedOutByTime, RelaxedLoad(&Stats->MailboxPushedOutByTime) + 1);
+ }
+
+ inline void IncrementMailboxPushedOutByEventCount() {
+ RelaxedStore(&Stats->MailboxPushedOutByEventCount, RelaxedLoad(&Stats->MailboxPushedOutByEventCount) + 1);
+ }
+
+ inline void IncrementEmptyMailboxActivation() {
+ RelaxedStore(&Stats->EmptyMailboxActivation, RelaxedLoad(&Stats->EmptyMailboxActivation) + 1);
+ }
+
+ double AddActivationStats(i64 scheduleTs, i64 deliveredTs) {
+ i64 ts = deliveredTs > scheduleTs ? deliveredTs - scheduleTs : 0;
+ double usec = NHPTimer::GetSeconds(ts) * 1000000.0;
+ Stats->ActivationTimeHistogram.Add(usec);
+ return usec;
+ }
+
+ ui64 AddEventDeliveryStats(i64 sentTs, i64 deliveredTs) {
+ ui64 usecDeliv = deliveredTs > sentTs ? NHPTimer::GetSeconds(deliveredTs - sentTs) * 1000000 : 0;
+ Stats->EventDeliveryTimeHistogram.Add(usecDeliv);
+ return usecDeliv;
+ }
+
+ i64 AddEventProcessingStats(i64 deliveredTs, i64 processedTs, ui32 activityType, ui64 scheduled) {
+ i64 elapsed = processedTs - deliveredTs;
+ ui64 usecElapsed = NHPTimer::GetSeconds(elapsed) * 1000000;
+ activityType = (activityType >= Stats->MaxActivityType()) ? 0 : activityType;
+ Stats->EventProcessingCountHistogram.Add(usecElapsed);
+ Stats->EventProcessingTimeHistogram.Add(usecElapsed, elapsed);
+ RelaxedStore(&Stats->ReceivedEvents, RelaxedLoad(&Stats->ReceivedEvents) + 1);
+ RelaxedStore(&Stats->ReceivedEventsByActivity[activityType], RelaxedLoad(&Stats->ReceivedEventsByActivity[activityType]) + 1);
+ RelaxedStore(&Stats->ScheduledEventsByActivity[activityType], RelaxedLoad(&Stats->ScheduledEventsByActivity[activityType]) + scheduled);
+ AddElapsedCycles(activityType, elapsed);
+ return elapsed;
+ }
+
+ void UpdateActorsStats(size_t dyingActorsCnt) {
+ if (dyingActorsCnt) {
+ AtomicAdd(Executor->DestroyedActors, dyingActorsCnt);
+ }
+ RelaxedStore(&Stats->PoolDestroyedActors, (ui64)RelaxedLoad(&Executor->DestroyedActors));
+ RelaxedStore(&Stats->PoolActorRegistrations, (ui64)RelaxedLoad(&Executor->ActorRegistrations));
+ RelaxedStore(&Stats->PoolAllocatedMailboxes, MailboxTable->GetAllocatedMailboxCount());
+ }
+
+ void UpdateThreadTime() {
+ RelaxedStore(&WorkerStats.CpuNs, ThreadCPUTime() * 1000);
+ }
+#else
+ void GetCurrentStats(TExecutorThreadStats&) const {}
+ inline void AddElapsedCycles(ui32, i64) {}
+ inline void AddParkedCycles(i64) {}
+ inline void AddBlockedCycles(i64) {}
+ inline void IncrementSentEvents() {}
+ inline void IncrementPreemptedEvents() {}
+ inline void IncrementMailboxPushedOutBySoftPreemption() {}
+ inline void IncrementMailboxPushedOutByTime() {}
+ inline void IncrementMailboxPushedOutByEventCount() {}
+ inline void IncrementEmptyMailboxActivation() {}
+ void DecrementActorsAliveByActivity(ui32) {}
+ void IncrementNonDeliveredEvents() {}
+ double AddActivationStats(i64, i64) { return 0; }
+ ui64 AddEventDeliveryStats(i64, i64) { return 0; }
+ i64 AddEventProcessingStats(i64, i64, ui32, ui64) { return 0; }
+ void UpdateActorsStats(size_t, IExecutorPool*) {}
+ void UpdateThreadTime() {}
+#endif
+
+ void Switch(IExecutorPool* executor,
+ TMailboxTable* mailboxTable,
+ ui64 timePerMailboxTs,
+ ui32 eventsPerMailbox,
+ ui64 softDeadlineTs,
+ TExecutorThreadStats* stats)
+ {
+ Executor = executor;
+ MailboxTable = mailboxTable;
+ TimePerMailboxTs = timePerMailboxTs;
+ EventsPerMailbox = eventsPerMailbox;
+ SoftDeadlineTs = softDeadlineTs;
+ Stats = stats;
+ PoolId = Executor ? Executor->PoolId : MaxPools;
+ }
+
+ void SwitchToIdle() {
+ Executor = nullptr;
+ MailboxTable = nullptr;
+ //Stats = &WorkerStats; // TODO: in actorsystem 2.0 idle stats cannot be related to specific pool
+ PoolId = MaxPools;
+ }
+ };
+}
diff --git a/library/cpp/actors/core/ya.make b/library/cpp/actors/core/ya.make
new file mode 100644
index 0000000000..880a9d00db
--- /dev/null
+++ b/library/cpp/actors/core/ya.make
@@ -0,0 +1,123 @@
+LIBRARY()
+
+OWNER(
+ ddoarn
+ g:kikimr
+)
+
+NO_WSHADOW()
+
+IF (PROFILE_MEMORY_ALLOCATIONS)
+ CFLAGS(-DPROFILE_MEMORY_ALLOCATIONS)
+ENDIF()
+
+IF (ALLOCATOR == "B" OR ALLOCATOR == "BS" OR ALLOCATOR == "C")
+ CXXFLAGS(-DBALLOC)
+ PEERDIR(
+ library/cpp/balloc/optional
+ )
+ENDIF()
+
+SRCS(
+ actor_bootstrapped.h
+ actor_coroutine.cpp
+ actor_coroutine.h
+ actor.cpp
+ actor.h
+ actorid.cpp
+ actorid.h
+ actorsystem.cpp
+ actorsystem.h
+ ask.cpp
+ ask.h
+ balancer.h
+ balancer.cpp
+ buffer.cpp
+ buffer.h
+ callstack.cpp
+ callstack.h
+ config.h
+ cpu_manager.cpp
+ cpu_manager.h
+ cpu_state.h
+ defs.h
+ event.cpp
+ event.h
+ event_load.h
+ event_local.h
+ event_pb.cpp
+ event_pb.h
+ events.h
+ events_undelivered.cpp
+ executelater.h
+ executor_pool_base.cpp
+ executor_pool_base.h
+ executor_pool_basic.cpp
+ executor_pool_basic.h
+ executor_pool_io.cpp
+ executor_pool_io.h
+ executor_pool_united.cpp
+ executor_pool_united.h
+ executor_thread.cpp
+ executor_thread.h
+ hfunc.h
+ interconnect.cpp
+ interconnect.h
+ invoke.h
+ io_dispatcher.cpp
+ io_dispatcher.h
+ lease.h
+ log.cpp
+ log.h
+ log_settings.cpp
+ log_settings.h
+ mailbox.cpp
+ mailbox.h
+ mailbox_queue_revolving.h
+ mailbox_queue_simple.h
+ memory_track.cpp
+ memory_track.h
+ memory_tracker.cpp
+ memory_tracker.h
+ mon.h
+ mon_stats.h
+ monotonic.cpp
+ monotonic.h
+ worker_context.cpp
+ worker_context.h
+ probes.cpp
+ probes.h
+ process_stats.cpp
+ process_stats.h
+ scheduler_actor.cpp
+ scheduler_actor.h
+ scheduler_basic.cpp
+ scheduler_basic.h
+ scheduler_cookie.cpp
+ scheduler_cookie.h
+ scheduler_queue.h
+ servicemap.h
+)
+
+GENERATE_ENUM_SERIALIZATION(defs.h)
+GENERATE_ENUM_SERIALIZATION(actor.h)
+
+PEERDIR(
+ library/cpp/actors/memory_log
+ library/cpp/actors/prof
+ library/cpp/actors/protos
+ library/cpp/actors/util
+ library/cpp/execprofile
+ library/cpp/json/writer
+ library/cpp/logger
+ library/cpp/lwtrace
+ library/cpp/monlib/dynamic_counters
+ library/cpp/svnversion
+ library/cpp/threading/future
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+)
diff --git a/library/cpp/actors/dnscachelib/dnscache.cpp b/library/cpp/actors/dnscachelib/dnscache.cpp
new file mode 100644
index 0000000000..649339ddb2
--- /dev/null
+++ b/library/cpp/actors/dnscachelib/dnscache.cpp
@@ -0,0 +1,445 @@
+#include "dnscache.h"
+#include "probes.h"
+#include "timekeeper.h"
+
+#include <contrib/libs/c-ares/ares.h>
+#include <util/system/guard.h>
+#include <util/datetime/systime.h>
+
+const TDnsCache::THost TDnsCache::NullHost;
+
+LWTRACE_USING(DNSCACHELIB_PROVIDER);
+
+static_assert(sizeof(ares_channel) == sizeof(void*), "expect sizeof(ares_channel) == sizeof(void *)");
+
+TDnsCache::TDnsCache(bool allowIpv4, bool allowIpv6, time_t lifetime, time_t neg, ui32 timeout)
+ : EntryLifetime(lifetime)
+ , NegativeLifetime(neg)
+ , Timeout(TDuration::MicroSeconds(timeout))
+ , AllowIpV4(allowIpv4)
+ , AllowIpV6(allowIpv6)
+ , ACacheHits(0)
+ , ACacheMisses(0)
+ , PtrCacheHits(0)
+ , PtrCacheMisses(0)
+{
+#ifdef _win_
+ if (ares_library_init(ARES_LIB_INIT_WIN32) != ARES_SUCCESS) {
+ LWPROBE(AresInitFailed);
+ ythrow yexception() << "ares_init() failed";
+ }
+#endif
+
+ ares_channel chan;
+
+ if (ares_init(&chan) != ARES_SUCCESS) {
+ LWPROBE(AresInitFailed);
+ ythrow yexception() << "ares_init() failed";
+ }
+ Channel = chan;
+ LWPROBE(Created);
+}
+
+TDnsCache::~TDnsCache(void) {
+ ares_channel chan = static_cast<ares_channel>(Channel);
+
+ ares_cancel(chan);
+ ares_destroy(chan);
+ LWPROBE(Destroyed);
+
+#ifdef _win_
+ ares_library_cleanup();
+#endif
+}
+
+TString TDnsCache::GetHostByAddr(const NAddr::IRemoteAddr& addr) {
+ in6_addr key;
+
+ if (addr.Addr()->sa_family == AF_INET6) {
+ const struct sockaddr_in6* s6 = (const struct sockaddr_in6*)(addr.Addr());
+ memcpy(&key, &s6->sin6_addr, sizeof(s6->sin6_addr));
+ } else if (addr.Addr()->sa_family == AF_INET) {
+ const struct sockaddr_in* s4 = (const struct sockaddr_in*)(addr.Addr());
+ memset(&key, 0, sizeof(key));
+ memcpy(&key, &s4->sin_addr, sizeof(s4->sin_addr));
+ } else {
+ return "";
+ }
+ const TAddr& host = ResolveAddr(key, addr.Addr()->sa_family);
+
+ return host.Hostname;
+}
+
+TIpHost TDnsCache::Get(const TString& hostname) {
+ if (!AllowIpV4)
+ return TIpHost(-1);
+
+ const THost& addr = Resolve(hostname, AF_INET);
+
+ TGuard<TMutex> lock(CacheMtx);
+ if (addr.AddrsV4.empty()) {
+ return TIpHost(-1);
+ }
+ return addr.AddrsV4.front();
+}
+
+NAddr::IRemoteAddrPtr TDnsCache::GetAddr(
+ const TString& hostname,
+ int family,
+ TIpPort port,
+ bool cacheOnly) {
+ if (family != AF_INET && AllowIpV6) {
+ const THost& addr = Resolve(hostname, AF_INET6, cacheOnly);
+
+ TGuard<TMutex> lock(CacheMtx);
+ if (!addr.AddrsV6.empty()) {
+ struct sockaddr_in6 sin6;
+ Zero(sin6);
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = addr.AddrsV6.front();
+ sin6.sin6_port = HostToInet(port);
+
+ return MakeHolder<NAddr::TIPv6Addr>(sin6);
+ }
+ }
+
+ if (family != AF_INET6 && AllowIpV4) {
+ const THost& addr = Resolve(hostname, AF_INET, cacheOnly);
+
+ TGuard<TMutex> lock(CacheMtx);
+ if (!addr.AddrsV4.empty()) {
+ return MakeHolder<NAddr::TIPv4Addr>(TIpAddress(addr.AddrsV4.front(), port));
+ }
+ }
+
+ LWPROBE(FamilyMismatch, family, AllowIpV4, AllowIpV6);
+ return nullptr;
+}
+
+void TDnsCache::GetAllAddresses(
+ const TString& hostname,
+ TVector<NAddr::IRemoteAddrPtr>& addrs) {
+ if (AllowIpV4) {
+ const THost& addr4 = Resolve(hostname, AF_INET);
+
+ TGuard<TMutex> lock(CacheMtx);
+ for (size_t i = 0; i < addr4.AddrsV4.size(); i++) {
+ addrs.push_back(MakeHolder<NAddr::TIPv4Addr>(TIpAddress(addr4.AddrsV4[i], 0)));
+ }
+ }
+
+ if (AllowIpV6) {
+ const THost& addr6 = Resolve(hostname, AF_INET6);
+
+ struct sockaddr_in6 sin6;
+ Zero(sin6);
+ sin6.sin6_family = AF_INET6;
+
+ TGuard<TMutex> lock(CacheMtx);
+ for (size_t i = 0; i < addr6.AddrsV6.size(); i++) {
+ sin6.sin6_addr = addr6.AddrsV6[i];
+
+ addrs.push_back(MakeHolder<NAddr::TIPv6Addr>(sin6));
+ }
+ }
+}
+
+void TDnsCache::GetStats(ui64& a_cache_hits, ui64& a_cache_misses,
+ ui64& ptr_cache_hits, ui64& ptr_cache_misses) {
+ TGuard<TMutex> lock(CacheMtx);
+
+ a_cache_hits = ACacheHits;
+ a_cache_misses = ACacheMisses;
+ ptr_cache_hits = PtrCacheHits;
+ ptr_cache_misses = PtrCacheMisses;
+}
+
+bool TDnsCache::THost::IsStale(int family, const TDnsCache* ctx) const noexcept {
+ time_t resolved = family == AF_INET ? ResolvedV4 : ResolvedV6;
+ time_t notfound = family == AF_INET ? NotFoundV4 : NotFoundV6;
+
+ if (TTimeKeeper::GetTime() - resolved < ctx->EntryLifetime)
+ return false;
+
+ if (TTimeKeeper::GetTime() - notfound < ctx->NegativeLifetime)
+ return false;
+
+ return true;
+}
+
+const TDnsCache::THost&
+TDnsCache::Resolve(const TString& hostname, int family, bool cacheOnly) {
+ if (!ValidateHName(hostname)) {
+ LWPROBE(ResolveNullHost, hostname, family);
+ return NullHost;
+ }
+
+ THostCache::iterator p;
+
+ Y_ASSERT(family == AF_INET || family == AF_INET6);
+
+ {
+ TGuard<TMutex> lock(CacheMtx);
+ p = HostCache.find(hostname);
+ if (p != HostCache.end()) {
+ if (!p->second.IsStale(family, this)) {
+ /* Recently resolved, just return cached value */
+ ACacheHits += 1;
+ THost& host = p->second;
+ LWPROBE(ResolveFromCache, hostname, family, host.AddrsV4ToString(), host.AddrsV6ToString(), ACacheHits);
+ return host;
+ } else {
+ LWPROBE(ResolveCacheTimeout, hostname);
+ }
+ } else {
+ /* Never resolved, create cache entry */
+ LWPROBE(ResolveCacheNew, hostname);
+ p = HostCache.insert(std::make_pair(hostname, THost())).first;
+ }
+ ACacheMisses += 1;
+ }
+
+ if (cacheOnly)
+ return NullHost;
+
+ TAtomic& inprogress = (family == AF_INET ? p->second.InProgressV4 : p->second.InProgressV6);
+
+ {
+ /* This way only! CacheMtx should always be taken AFTER AresMtx,
+ * because later in ares_process it can only be done this way.
+ * Lock order reversal will cause deadlock in unfortunate monents.
+ */
+ TGuard<TMutex> areslock(AresMtx);
+ TGuard<TMutex> cachelock(CacheMtx);
+
+ if (!inprogress) {
+ ares_channel chan = static_cast<ares_channel>(Channel);
+ TGHBNContext* ctx = new TGHBNContext();
+ ctx->Owner = this;
+ ctx->Hostname = hostname;
+ ctx->Family = family;
+
+ AtomicSet(inprogress, 1);
+ ares_gethostbyname(chan, hostname.c_str(), family,
+ &TDnsCache::GHBNCallback, ctx);
+ }
+ }
+
+ WaitTask(inprogress);
+
+ LWPROBE(ResolveDone, hostname, family, p->second.AddrsV4ToString(), p->second.AddrsV6ToString());
+ return p->second;
+}
+
+bool TDnsCache::ValidateHName(const TString& name) const noexcept {
+ return name.size() > 0;
+}
+
+const TDnsCache::TAddr& TDnsCache::ResolveAddr(const in6_addr& addr, int family) {
+ TAddrCache::iterator p;
+
+ {
+ TGuard<TMutex> lock(CacheMtx);
+ p = AddrCache.find(addr);
+ if (p != AddrCache.end()) {
+ if (TTimeKeeper::GetTime() - p->second.Resolved < EntryLifetime || TTimeKeeper::GetTime() - p->second.NotFound < NegativeLifetime) {
+ /* Recently resolved, just return cached value */
+ PtrCacheHits += 1;
+ return p->second;
+ }
+ } else {
+ /* Never resolved, create cache entry */
+
+ p = AddrCache.insert(std::make_pair(addr, TAddr())).first;
+ }
+ PtrCacheMisses += 1;
+ }
+
+ {
+ /* This way only! CacheMtx should always be taken AFTER AresMtx,
+ * because later in ares_process it can only be done this way.
+ * Lock order reversal will cause deadlock in unfortunate monents.
+ */
+ TGuard<TMutex> areslock(AresMtx);
+ TGuard<TMutex> cachelock(CacheMtx);
+
+ if (!p->second.InProgress) {
+ ares_channel chan = static_cast<ares_channel>(Channel);
+ TGHBAContext* ctx = new TGHBAContext();
+ ctx->Owner = this;
+ ctx->Addr = addr;
+
+ AtomicSet(p->second.InProgress, 1);
+ ares_gethostbyaddr(chan, &addr,
+ family == AF_INET ? sizeof(in_addr) : sizeof(in6_addr),
+ family, &TDnsCache::GHBACallback, ctx);
+ }
+ }
+
+ WaitTask(p->second.InProgress);
+
+ return p->second;
+}
+
+void TDnsCache::WaitTask(TAtomic& flag) {
+ const TInstant start = TInstant(TTimeKeeper::GetTimeval());
+
+ while (AtomicGet(flag)) {
+ ares_channel chan = static_cast<ares_channel>(Channel);
+
+ struct pollfd pfd[ARES_GETSOCK_MAXNUM];
+ int nfds;
+ ares_socket_t socks[ARES_GETSOCK_MAXNUM];
+ int bits;
+
+ {
+ TGuard<TMutex> lock(AresMtx);
+ bits = ares_getsock(chan, socks, ARES_GETSOCK_MAXNUM);
+ if (bits == 0) {
+ /* other thread did our job */
+ continue;
+ }
+ }
+
+ for (nfds = 0; nfds < ARES_GETSOCK_MAXNUM; nfds++) {
+ pfd[nfds].events = 0;
+ pfd[nfds].revents = 0;
+ if (ARES_GETSOCK_READABLE(bits, nfds)) {
+ pfd[nfds].fd = socks[nfds];
+ pfd[nfds].events |= POLLRDNORM | POLLIN;
+ }
+ if (ARES_GETSOCK_WRITABLE(bits, nfds)) {
+ pfd[nfds].fd = socks[nfds];
+ pfd[nfds].events |= POLLWRNORM | POLLOUT;
+ }
+ if (pfd[nfds].events == 0) {
+ break;
+ }
+ }
+
+ Y_ASSERT(nfds != 0);
+
+ const TDuration left = TInstant(TTimeKeeper::GetTimeval()) - start;
+ const TDuration wait = Max(Timeout - left, TDuration::Zero());
+
+ int rv = poll(pfd, nfds, wait.MilliSeconds());
+
+ if (rv == -1) {
+ if (errno == EINTR) {
+ continue;
+ }
+ /* Unknown error in select, can't recover. Just pretend there was no reply */
+ rv = 0;
+ }
+
+ if (rv == 0) {
+ /* poll() timed out */
+ TGuard<TMutex> lock(AresMtx);
+ ares_process_fd(chan, ARES_SOCKET_BAD, ARES_SOCKET_BAD);
+ } else {
+ for (int i = 0; i < nfds; i++) {
+ if (pfd[i].revents == 0) {
+ continue;
+ }
+ TGuard<TMutex> lock(AresMtx);
+ ares_process_fd(chan,
+ pfd[i].revents & (POLLRDNORM | POLLIN)
+ ? pfd[i].fd
+ : ARES_SOCKET_BAD,
+ pfd[i].revents & (POLLWRNORM | POLLOUT)
+ ? pfd[i].fd
+ : ARES_SOCKET_BAD);
+ }
+ }
+
+ if (start + Timeout <= TInstant(TTimeKeeper::GetTimeval())) {
+ break;
+ }
+ }
+}
+
+void TDnsCache::GHBNCallback(void* arg, int status, int, struct hostent* info) {
+ THolder<TGHBNContext> ctx(static_cast<TGHBNContext*>(arg));
+ TGuard<TMutex> lock(ctx->Owner->CacheMtx);
+ THostCache::iterator p = ctx->Owner->HostCache.find(ctx->Hostname);
+
+ Y_ASSERT(p != ctx->Owner->HostCache.end());
+
+ time_t& resolved = (ctx->Family == AF_INET ? p->second.ResolvedV4 : p->second.ResolvedV6);
+ time_t& notfound = (ctx->Family == AF_INET ? p->second.NotFoundV4 : p->second.NotFoundV6);
+ TAtomic& inprogress = (ctx->Family == AF_INET ? p->second.InProgressV4 : p->second.InProgressV6);
+
+ if (status == ARES_SUCCESS) {
+ if (info->h_addrtype == AF_INET) {
+ p->second.AddrsV4.clear();
+ for (int i = 0; info->h_addr_list[i] != nullptr; i++) {
+ p->second.AddrsV4.push_back(*(TIpHost*)(info->h_addr_list[i]));
+ }
+ /* It is possible to ask ares for IPv6 and have IPv4 addrs instead,
+ so take care and set V4 timers anyway.
+ */
+ p->second.ResolvedV4 = TTimeKeeper::GetTime();
+ p->second.ResolvedV4 = 0;
+ AtomicSet(p->second.InProgressV4, 0);
+ } else if (info->h_addrtype == AF_INET6) {
+ p->second.AddrsV6.clear();
+ for (int i = 0; info->h_addr_list[i] != nullptr; i++) {
+ p->second.AddrsV6.push_back(*(struct in6_addr*)(info->h_addr_list[i]));
+ }
+ } else {
+ Y_FAIL("unknown address type in ares callback");
+ }
+ resolved = TTimeKeeper::GetTime();
+ notfound = 0;
+ } else {
+ notfound = TTimeKeeper::GetTime();
+ resolved = 0;
+ }
+ AtomicSet(inprogress, 0);
+}
+
+void TDnsCache::GHBACallback(void* arg, int status, int, struct hostent* info) {
+ THolder<TGHBAContext> ctx(static_cast<TGHBAContext*>(arg));
+ TGuard<TMutex> lock(ctx->Owner->CacheMtx);
+ TAddrCache::iterator p = ctx->Owner->AddrCache.find(ctx->Addr);
+
+ Y_ASSERT(p != ctx->Owner->AddrCache.end());
+
+ if (status == ARES_SUCCESS) {
+ p->second.Hostname = info->h_name;
+ p->second.Resolved = TTimeKeeper::GetTime();
+ p->second.NotFound = 0;
+ } else {
+ p->second.NotFound = TTimeKeeper::GetTime();
+ p->second.Resolved = 0;
+ }
+ AtomicSet(p->second.InProgress, 0);
+}
+
+TString TDnsCache::THost::AddrsV4ToString() const {
+ TStringStream ss;
+ bool first = false;
+ for (TIpHost addr : AddrsV4) {
+ ss << (first ? "" : " ") << IpToString(addr);
+ first = false;
+ }
+ return ss.Str();
+}
+
+TString TDnsCache::THost::AddrsV6ToString() const {
+ TStringStream ss;
+ bool first = false;
+ for (in6_addr addr : AddrsV6) {
+ struct sockaddr_in6 sin6;
+ Zero(sin6);
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = addr;
+
+ NAddr::TIPv6Addr addr6(sin6);
+ ss << (first ? "" : " ") << NAddr::PrintHost(addr6);
+ first = false;
+ }
+ return ss.Str();
+}
+
+TDnsCache::TAresLibInit TDnsCache::InitAresLib;
diff --git a/library/cpp/actors/dnscachelib/dnscache.h b/library/cpp/actors/dnscachelib/dnscache.h
new file mode 100644
index 0000000000..3313a251a1
--- /dev/null
+++ b/library/cpp/actors/dnscachelib/dnscache.h
@@ -0,0 +1,148 @@
+#pragma once
+
+#include <contrib/libs/c-ares/ares.h>
+#include <util/generic/map.h>
+#include <util/generic/vector.h>
+#include <util/network/address.h>
+#include <util/system/mutex.h>
+#include <util/datetime/base.h>
+
+/** Asynchronous DNS resolver.
+ *
+ * This is NOT general purpose resolver! It is designed with very specific assumptions:
+ * 1) there is relatively small and rarely changed set of resolved names (like, server pool in cluster)
+ * 2) this names supposed to have addresses, absense of A record is equal to DNS error
+ * 3) most of the time IP addresses do not change
+ * 4) it's OK to return old IP address when DNS server not responding in time
+ */
+
+class TDnsCache {
+public:
+ TDnsCache(bool allowIpv4 = true, bool allowIpv6 = true, time_t entry_lifetime = 1800, time_t neg_lifetime = 1, ui32 request_timeout = 500000);
+ ~TDnsCache();
+
+ TString GetHostByAddr(const NAddr::IRemoteAddr&);
+
+ // ip in network byte order
+ TIpHost Get(const TString& host);
+
+ /* use with AF_INET, AF_INET6 or AF_UNSPEC */
+ NAddr::IRemoteAddrPtr GetAddr(const TString& host,
+ int family,
+ TIpPort port = 0,
+ bool cacheOnly = false);
+
+ void GetAllAddresses(const TString& host, TVector<NAddr::IRemoteAddrPtr>&);
+
+ void GetStats(ui64& a_cache_hits, ui64& a_cache_misses,
+ ui64& ptr_cache_hits, ui64& ptr_cache_misses);
+
+protected:
+ bool ValidateHName(const TString& host) const noexcept;
+
+private:
+ struct TGHBNContext {
+ TDnsCache* Owner;
+ TString Hostname;
+ int Family;
+ };
+
+ struct TGHBAContext {
+ TDnsCache* Owner;
+ in6_addr Addr;
+ };
+
+ struct THost {
+ THost() noexcept {
+ }
+
+ TVector<TIpHost> AddrsV4;
+ time_t ResolvedV4 = 0;
+ time_t NotFoundV4 = 0;
+ TAtomic InProgressV4 = 0;
+
+ TVector<in6_addr> AddrsV6;
+ time_t ResolvedV6 = 0;
+ time_t NotFoundV6 = 0;
+ TAtomic InProgressV6 = 0;
+
+ TString AddrsV4ToString() const;
+ TString AddrsV6ToString() const;
+
+ bool IsStale(int family, const TDnsCache* ctx) const noexcept;
+ };
+
+ typedef TMap<TString, THost> THostCache;
+
+ struct TAddr {
+ TString Hostname;
+ time_t Resolved = 0;
+ time_t NotFound = 0;
+ TAtomic InProgress = 0;
+ };
+ /* IRemoteAddr is annoingly hard to use, so I'll use in6_addr as key
+ * and put v4 addrs in it.
+ */
+ struct TAddrCmp {
+ bool operator()(const in6_addr& left, const in6_addr& right) const {
+ for (size_t i = 0; i < sizeof(left); i++) {
+ if (left.s6_addr[i] < right.s6_addr[i]) {
+ return true;
+ } else if (left.s6_addr[i] > right.s6_addr[i]) {
+ return false;
+ }
+ }
+ // equal
+ return false;
+ }
+ };
+ typedef TMap<in6_addr, TAddr, TAddrCmp> TAddrCache;
+
+ const THost& Resolve(const TString&, int family, bool cacheOnly = false);
+
+ const TAddr& ResolveAddr(const in6_addr&, int family);
+
+ void WaitTask(TAtomic&);
+
+ static void GHBNCallback(void* arg, int status, int timeouts,
+ struct hostent* info);
+
+ static void GHBACallback(void* arg, int status, int timeouts,
+ struct hostent* info);
+
+ const time_t EntryLifetime;
+ const time_t NegativeLifetime;
+ const TDuration Timeout;
+ const bool AllowIpV4;
+ const bool AllowIpV6;
+
+ TMutex CacheMtx;
+ THostCache HostCache;
+ TAddrCache AddrCache;
+ ui64 ACacheHits;
+ ui64 ACacheMisses;
+ ui64 PtrCacheHits;
+ ui64 PtrCacheMisses;
+
+ const static THost NullHost;
+
+ TMutex AresMtx;
+ void* Channel;
+
+ struct TAresLibInit {
+ TAresLibInit() {
+#ifdef _win_
+ const auto res = ares_library_init(ARES_LIB_INIT_ALL);
+ Y_VERIFY(res == 0);
+#endif
+ }
+
+ ~TAresLibInit() {
+#ifdef _win_
+ ares_library_cleanup();
+#endif
+ }
+ };
+
+ static TAresLibInit InitAresLib;
+};
diff --git a/library/cpp/actors/dnscachelib/probes.cpp b/library/cpp/actors/dnscachelib/probes.cpp
new file mode 100644
index 0000000000..07734ab20f
--- /dev/null
+++ b/library/cpp/actors/dnscachelib/probes.cpp
@@ -0,0 +1,3 @@
+#include "probes.h"
+
+LWTRACE_DEFINE_PROVIDER(DNSCACHELIB_PROVIDER)
diff --git a/library/cpp/actors/dnscachelib/probes.h b/library/cpp/actors/dnscachelib/probes.h
new file mode 100644
index 0000000000..313b7b8712
--- /dev/null
+++ b/library/cpp/actors/dnscachelib/probes.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include <library/cpp/lwtrace/all.h>
+
+#define DNSCACHELIB_PROVIDER(PROBE, EVENT, GROUPS, TYPES, NAMES) \
+ PROBE(Created, GROUPS(), TYPES(), NAMES()) \
+ PROBE(Destroyed, GROUPS(), TYPES(), NAMES()) \
+ PROBE(AresInitFailed, GROUPS(), TYPES(), NAMES()) \
+ PROBE(FamilyMismatch, \
+ GROUPS(), \
+ TYPES(int, bool, bool), \
+ NAMES("family", "allowIpV4", "allowIpV6")) \
+ PROBE(ResolveNullHost, \
+ GROUPS(), \
+ TYPES(TString, int), \
+ NAMES("hostname", "family")) \
+ PROBE(ResolveFromCache, \
+ GROUPS(), \
+ TYPES(TString, int, TString, TString, ui64), \
+ NAMES("hostname", "family", "addrsV4", "addrsV6", "aCacheHits")) \
+ PROBE(ResolveDone, \
+ GROUPS(), \
+ TYPES(TString, int, TString, TString), \
+ NAMES("hostname", "family", "addrsV4", "addrsV6")) \
+ PROBE(ResolveCacheTimeout, \
+ GROUPS(), \
+ TYPES(TString), \
+ NAMES("hostname")) \
+ PROBE(ResolveCacheNew, \
+ GROUPS(), \
+ TYPES(TString), \
+ NAMES("hostname")) \
+ /**/
+
+LWTRACE_DECLARE_PROVIDER(DNSCACHELIB_PROVIDER)
diff --git a/library/cpp/actors/dnscachelib/timekeeper.h b/library/cpp/actors/dnscachelib/timekeeper.h
new file mode 100644
index 0000000000..0528d8549c
--- /dev/null
+++ b/library/cpp/actors/dnscachelib/timekeeper.h
@@ -0,0 +1,70 @@
+#pragma once
+
+#include <util/datetime/base.h>
+#include <util/generic/singleton.h>
+#include <util/string/cast.h>
+#include <util/system/thread.h>
+#include <util/system/event.h>
+#include <util/system/env.h>
+
+#include <cstdlib>
+
+/* Keeps current time accurate up to 1/10 second */
+
+class TTimeKeeper {
+public:
+ static TInstant GetNow(void) {
+ return TInstant::MicroSeconds(GetTime());
+ }
+
+ static time_t GetTime(void) {
+ return Singleton<TTimeKeeper>()->CurrentTime.tv_sec;
+ }
+
+ static const struct timeval& GetTimeval(void) {
+ return Singleton<TTimeKeeper>()->CurrentTime;
+ }
+
+ TTimeKeeper()
+ : Thread(&TTimeKeeper::Worker, this)
+ {
+ ConstTime = !!GetEnv("TEST_TIME");
+ if (ConstTime) {
+ try {
+ CurrentTime.tv_sec = FromString<ui32>(GetEnv("TEST_TIME"));
+ } catch (TFromStringException exc) {
+ ConstTime = false;
+ }
+ }
+ if (!ConstTime) {
+ gettimeofday(&CurrentTime, nullptr);
+ Thread.Start();
+ }
+ }
+
+ ~TTimeKeeper() {
+ if (!ConstTime) {
+ Exit.Signal();
+ Thread.Join();
+ }
+ }
+
+private:
+ static const ui32 UpdateInterval = 100000;
+ struct timeval CurrentTime;
+ bool ConstTime;
+ TSystemEvent Exit;
+ TThread Thread;
+
+ static void* Worker(void* arg) {
+ TTimeKeeper* owner = static_cast<TTimeKeeper*>(arg);
+
+ do {
+ /* Race condition may occur here but locking looks too expensive */
+
+ gettimeofday(&owner->CurrentTime, nullptr);
+ } while (!owner->Exit.WaitT(TDuration::MicroSeconds(UpdateInterval)));
+
+ return nullptr;
+ }
+};
diff --git a/library/cpp/actors/dnscachelib/ya.make b/library/cpp/actors/dnscachelib/ya.make
new file mode 100644
index 0000000000..e3a6ad6202
--- /dev/null
+++ b/library/cpp/actors/dnscachelib/ya.make
@@ -0,0 +1,24 @@
+LIBRARY()
+
+OWNER(
+ davenger
+ fomichev
+ serxa
+ dimanne
+ single
+)
+
+SRCS(
+ dnscache.cpp
+ dnscache.h
+ probes.cpp
+ probes.h
+ timekeeper.h
+)
+
+PEERDIR(
+ contrib/libs/c-ares
+ library/cpp/lwtrace
+)
+
+END()
diff --git a/library/cpp/actors/dnsresolver/dnsresolver.cpp b/library/cpp/actors/dnsresolver/dnsresolver.cpp
new file mode 100644
index 0000000000..6329bb0083
--- /dev/null
+++ b/library/cpp/actors/dnsresolver/dnsresolver.cpp
@@ -0,0 +1,475 @@
+#include "dnsresolver.h"
+
+#include <library/cpp/actors/core/hfunc.h>
+#include <library/cpp/threading/queue/mpsc_htswap.h>
+#include <util/network/pair.h>
+#include <util/network/socket.h>
+#include <util/string/builder.h>
+#include <util/system/thread.h>
+
+#include <ares.h>
+
+#include <atomic>
+
+namespace NActors {
+namespace NDnsResolver {
+
+ class TAresLibraryInitBase {
+ protected:
+ TAresLibraryInitBase() noexcept {
+ int status = ares_library_init(ARES_LIB_INIT_ALL);
+ Y_VERIFY(status == ARES_SUCCESS, "Unexpected failure to initialize c-ares library");
+ }
+
+ ~TAresLibraryInitBase() noexcept {
+ ares_library_cleanup();
+ }
+ };
+
+ class TCallbackQueueBase {
+ protected:
+ TCallbackQueueBase() noexcept {
+ int err = SocketPair(Sockets, false, true);
+ Y_VERIFY(err == 0, "Unexpected failure to create a socket pair");
+ SetNonBlock(Sockets[0]);
+ SetNonBlock(Sockets[1]);
+ }
+
+ ~TCallbackQueueBase() noexcept {
+ closesocket(Sockets[0]);
+ closesocket(Sockets[1]);
+ }
+
+ protected:
+ using TCallback = std::function<void()>;
+ using TCallbackQueue = NThreading::THTSwapQueue<TCallback>;
+
+ void PushCallback(TCallback callback) {
+ Y_VERIFY(callback, "Cannot push an empty callback");
+ CallbackQueue.Push(std::move(callback)); // this is a lockfree queue
+
+ // Wake up worker thread on the first activation
+ if (Activations.fetch_add(1, std::memory_order_acq_rel) == 0) {
+ char ch = 'x';
+ ssize_t ret;
+#ifdef _win_
+ ret = send(SignalSock(), &ch, 1, 0);
+ if (ret == -1) {
+ Y_VERIFY(WSAGetLastError() == WSAEWOULDBLOCK, "Unexpected send error");
+ return;
+ }
+#else
+ do {
+ ret = send(SignalSock(), &ch, 1, 0);
+ } while (ret == -1 && errno == EINTR);
+ if (ret == -1) {
+ Y_VERIFY(errno == EAGAIN || errno == EWOULDBLOCK, "Unexpected send error");
+ return;
+ }
+#endif
+ Y_VERIFY(ret == 1, "Unexpected send result");
+ }
+ }
+
+ void RunCallbacks() noexcept {
+ char ch[32];
+ ssize_t ret;
+ bool signalled = false;
+ for (;;) {
+ ret = recv(WaitSock(), ch, sizeof(ch), 0);
+ if (ret > 0) {
+ signalled = true;
+ }
+ if (ret == sizeof(ch)) {
+ continue;
+ }
+ if (ret != -1) {
+ break;
+ }
+#ifdef _win_
+ if (WSAGetLastError() == WSAEWOULDBLOCK) {
+ break;
+ }
+ Y_FAIL("Unexpected recv error");
+#else
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ break;
+ }
+ Y_VERIFY(errno == EINTR, "Unexpected recv error");
+#endif
+ }
+
+ if (signalled) {
+ // There's exactly one write to SignalSock while Activations != 0
+ // It's impossible to get signalled while Activations == 0
+ // We must set Activations = 0 to receive new signals
+ size_t count = Activations.exchange(0, std::memory_order_acq_rel);
+ Y_VERIFY(count != 0);
+
+ // N.B. due to the way HTSwap works we may not be able to pop
+ // all callbacks on this activation, however we expect a new
+ // delayed activation to happen at a later time.
+ while (auto callback = CallbackQueue.Pop()) {
+ callback();
+ }
+ }
+ }
+
+ SOCKET SignalSock() {
+ return Sockets[0];
+ }
+
+ SOCKET WaitSock() {
+ return Sockets[1];
+ }
+
+ private:
+ SOCKET Sockets[2];
+ TCallbackQueue CallbackQueue;
+ std::atomic<size_t> Activations{ 0 };
+ };
+
+ class TSimpleDnsResolver
+ : public TActor<TSimpleDnsResolver>
+ , private TAresLibraryInitBase
+ , private TCallbackQueueBase
+ {
+ public:
+ TSimpleDnsResolver(TSimpleDnsResolverOptions options) noexcept
+ : TActor(&TThis::StateWork)
+ , Options(std::move(options))
+ , WorkerThread(&TThis::WorkerThreadStart, this)
+ {
+ InitAres();
+
+ WorkerThread.Start();
+ }
+
+ ~TSimpleDnsResolver() noexcept override {
+ if (!Stopped) {
+ PushCallback([this] {
+ // Mark as stopped first
+ Stopped = true;
+
+ // Cancel all current ares requests (will not send replies)
+ ares_cancel(AresChannel);
+ });
+
+ WorkerThread.Join();
+ }
+
+ StopAres();
+ }
+
+ static constexpr EActivityType ActorActivityType() {
+ return DNS_RESOLVER;
+ }
+
+ private:
+ void InitAres() noexcept {
+ struct ares_options options;
+ memset(&options, 0, sizeof(options));
+ int optmask = 0;
+
+ options.flags = ARES_FLAG_STAYOPEN;
+ optmask |= ARES_OPT_FLAGS;
+
+ options.sock_state_cb = &TThis::SockStateCallback;
+ options.sock_state_cb_data = this;
+ optmask |= ARES_OPT_SOCK_STATE_CB;
+
+ options.timeout = Options.Timeout.MilliSeconds();
+ if (options.timeout > 0) {
+ optmask |= ARES_OPT_TIMEOUTMS;
+ }
+
+ options.tries = Options.Attempts;
+ if (options.tries > 0) {
+ optmask |= ARES_OPT_TRIES;
+ }
+
+ int err = ares_init_options(&AresChannel, &options, optmask);
+ Y_VERIFY(err == 0, "Unexpected failure to initialize c-ares channel");
+
+ if (Options.Servers) {
+ TStringBuilder csv;
+ for (const TString& server : Options.Servers) {
+ if (csv) {
+ csv << ',';
+ }
+ csv << server;
+ }
+ err = ares_set_servers_ports_csv(AresChannel, csv.c_str());
+ Y_VERIFY(err == 0, "Unexpected failure to set a list of dns servers: %s", ares_strerror(err));
+ }
+ }
+
+ void StopAres() noexcept {
+ // Destroy the ares channel
+ ares_destroy(AresChannel);
+ AresChannel = nullptr;
+ }
+
+ private:
+ STRICT_STFUNC(StateWork, {
+ hFunc(TEvents::TEvPoison, Handle);
+ hFunc(TEvDns::TEvGetHostByName, Handle);
+ hFunc(TEvDns::TEvGetAddr, Handle);
+ })
+
+ void Handle(TEvents::TEvPoison::TPtr&) {
+ Y_VERIFY(!Stopped);
+
+ PushCallback([this] {
+ // Cancel all current ares requests (will send notifications)
+ ares_cancel(AresChannel);
+
+ // Mark as stopped last
+ Stopped = true;
+ });
+
+ WorkerThread.Join();
+ PassAway();
+ }
+
+ private:
+ enum class ERequestType {
+ GetHostByName,
+ GetAddr,
+ };
+
+ struct TRequestContext : public TThrRefBase {
+ using TPtr = TIntrusivePtr<TRequestContext>;
+
+ TThis* Self;
+ TActorSystem* ActorSystem;
+ TActorId SelfId;
+ TActorId Sender;
+ ui64 Cookie;
+ ERequestType Type;
+
+ TRequestContext(TThis* self, TActorSystem* as, TActorId selfId, TActorId sender, ui64 cookie, ERequestType type)
+ : Self(self)
+ , ActorSystem(as)
+ , SelfId(selfId)
+ , Sender(sender)
+ , Cookie(cookie)
+ , Type(type)
+ { }
+ };
+
+ private:
+ void Handle(TEvDns::TEvGetHostByName::TPtr& ev) {
+ auto* msg = ev->Get();
+ auto reqCtx = MakeIntrusive<TRequestContext>(
+ this, TActivationContext::ActorSystem(), SelfId(), ev->Sender, ev->Cookie, ERequestType::GetHostByName);
+ PushCallback([this, reqCtx = std::move(reqCtx), name = std::move(msg->Name), family = msg->Family] () mutable {
+ StartGetHostByName(std::move(reqCtx), std::move(name), family);
+ });
+ }
+
+ void Handle(TEvDns::TEvGetAddr::TPtr& ev) {
+ auto* msg = ev->Get();
+ auto reqCtx = MakeIntrusive<TRequestContext>(
+ this, TActivationContext::ActorSystem(), SelfId(), ev->Sender, ev->Cookie, ERequestType::GetAddr);
+ PushCallback([this, reqCtx = std::move(reqCtx), name = std::move(msg->Name), family = msg->Family] () mutable {
+ StartGetHostByName(std::move(reqCtx), std::move(name), family);
+ });
+ }
+
+ void StartGetHostByName(TRequestContext::TPtr reqCtx, TString name, int family) noexcept {
+ reqCtx->Ref();
+ ares_gethostbyname(AresChannel, name.c_str(), family,
+ &TThis::GetHostByNameAresCallback, reqCtx.Get());
+ }
+
+ private:
+ static void GetHostByNameAresCallback(void* arg, int status, int timeouts, struct hostent* info) {
+ Y_UNUSED(timeouts);
+ TRequestContext::TPtr reqCtx(static_cast<TRequestContext*>(arg));
+ reqCtx->UnRef();
+
+ if (reqCtx->Self->Stopped) {
+ // Don't send any replies after destruction
+ return;
+ }
+
+ switch (reqCtx->Type) {
+ case ERequestType::GetHostByName: {
+ auto result = MakeHolder<TEvDns::TEvGetHostByNameResult>();
+ if (status == 0) {
+ switch (info->h_addrtype) {
+ case AF_INET: {
+ for (int i = 0; info->h_addr_list[i] != nullptr; ++i) {
+ result->AddrsV4.emplace_back(*(struct in_addr*)(info->h_addr_list[i]));
+ }
+ break;
+ }
+ case AF_INET6: {
+ for (int i = 0; info->h_addr_list[i] != nullptr; ++i) {
+ result->AddrsV6.emplace_back(*(struct in6_addr*)(info->h_addr_list[i]));
+ }
+ break;
+ }
+ default:
+ Y_FAIL("unknown address family in ares callback");
+ }
+ } else {
+ result->ErrorText = ares_strerror(status);
+ }
+ result->Status = status;
+
+ reqCtx->ActorSystem->Send(new IEventHandle(reqCtx->Sender, reqCtx->SelfId, result.Release(), 0, reqCtx->Cookie));
+ break;
+ }
+
+ case ERequestType::GetAddr: {
+ auto result = MakeHolder<TEvDns::TEvGetAddrResult>();
+ if (status == 0 && Y_UNLIKELY(info->h_addr_list[0] == nullptr)) {
+ status = ARES_ENODATA;
+ }
+ if (status == 0) {
+ switch (info->h_addrtype) {
+ case AF_INET: {
+ result->Addr = *(struct in_addr*)(info->h_addr_list[0]);
+ break;
+ }
+ case AF_INET6: {
+ result->Addr = *(struct in6_addr*)(info->h_addr_list[0]);
+ break;
+ }
+ default:
+ Y_FAIL("unknown address family in ares callback");
+ }
+ } else {
+ result->ErrorText = ares_strerror(status);
+ }
+ result->Status = status;
+
+ reqCtx->ActorSystem->Send(new IEventHandle(reqCtx->Sender, reqCtx->SelfId, result.Release(), 0, reqCtx->Cookie));
+ break;
+ }
+ }
+ }
+
+ private:
+ static void SockStateCallback(void* data, ares_socket_t socket_fd, int readable, int writable) {
+ static_cast<TThis*>(data)->DoSockStateCallback(socket_fd, readable, writable);
+ }
+
+ void DoSockStateCallback(ares_socket_t socket_fd, int readable, int writable) noexcept {
+ int events = (readable ? (POLLRDNORM | POLLIN) : 0) | (writable ? (POLLWRNORM | POLLOUT) : 0);
+ if (events == 0) {
+ AresSockStates.erase(socket_fd);
+ } else {
+ AresSockStates[socket_fd].NeededEvents = events;
+ }
+ }
+
+ private:
+ static void* WorkerThreadStart(void* arg) noexcept {
+ static_cast<TSimpleDnsResolver*>(arg)->WorkerThreadLoop();
+ return nullptr;
+ }
+
+ void WorkerThreadLoop() noexcept {
+ TThread::SetCurrentThreadName("DnsResolver");
+
+ TVector<struct pollfd> fds;
+ while (!Stopped) {
+ fds.clear();
+ fds.reserve(1 + AresSockStates.size());
+ {
+ auto& entry = fds.emplace_back();
+ entry.fd = WaitSock();
+ entry.events = POLLRDNORM | POLLIN;
+ }
+ for (auto& kv : AresSockStates) {
+ auto& entry = fds.emplace_back();
+ entry.fd = kv.first;
+ entry.events = kv.second.NeededEvents;
+ }
+
+ int timeout = -1;
+ struct timeval tv;
+ if (ares_timeout(AresChannel, nullptr, &tv)) {
+ timeout = tv.tv_sec * 1000 + tv.tv_usec / 1000;
+ }
+
+ int ret = poll(fds.data(), fds.size(), timeout);
+ if (ret == -1) {
+ if (errno == EINTR) {
+ continue;
+ }
+ // we cannot handle failures, run callbacks and pretend everything is ok
+ RunCallbacks();
+ if (Stopped) {
+ break;
+ }
+ ret = 0;
+ }
+
+ bool ares_called = false;
+ if (ret > 0) {
+ for (size_t i = 0; i < fds.size(); ++i) {
+ auto& entry = fds[i];
+
+ // Handle WaitSock activation and run callbacks
+ if (i == 0) {
+ if (entry.revents & (POLLRDNORM | POLLIN)) {
+ RunCallbacks();
+ if (Stopped) {
+ break;
+ }
+ }
+ continue;
+ }
+
+ // All other sockets belong to ares
+ if (entry.revents == 0) {
+ continue;
+ }
+ // Previous invocation of aress_process_fd might have removed some sockets
+ if (Y_UNLIKELY(!AresSockStates.contains(entry.fd))) {
+ continue;
+ }
+ ares_process_fd(
+ AresChannel,
+ entry.revents & (POLLRDNORM | POLLIN) ? entry.fd : ARES_SOCKET_BAD,
+ entry.revents & (POLLWRNORM | POLLOUT) ? entry.fd : ARES_SOCKET_BAD);
+ ares_called = true;
+ }
+
+ if (Stopped) {
+ break;
+ }
+ }
+
+ if (!ares_called) {
+ // Let ares handle timeouts
+ ares_process_fd(AresChannel, ARES_SOCKET_BAD, ARES_SOCKET_BAD);
+ }
+ }
+ }
+
+ private:
+ struct TSockState {
+ short NeededEvents = 0; // poll events
+ };
+
+ private:
+ TSimpleDnsResolverOptions Options;
+ TThread WorkerThread;
+
+ ares_channel AresChannel;
+ THashMap<SOCKET, TSockState> AresSockStates;
+
+ bool Stopped = false;
+ };
+
+ IActor* CreateSimpleDnsResolver(TSimpleDnsResolverOptions options) {
+ return new TSimpleDnsResolver(std::move(options));
+ }
+
+} // namespace NDnsResolver
+} // namespace NActors
diff --git a/library/cpp/actors/dnsresolver/dnsresolver.h b/library/cpp/actors/dnsresolver/dnsresolver.h
new file mode 100644
index 0000000000..88fc74df7d
--- /dev/null
+++ b/library/cpp/actors/dnsresolver/dnsresolver.h
@@ -0,0 +1,128 @@
+#pragma once
+
+#include <library/cpp/actors/core/actor.h>
+#include <library/cpp/actors/core/events.h>
+#include <library/cpp/actors/core/event_local.h>
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+#include <util/network/address.h>
+#include <variant>
+
+namespace NActors {
+namespace NDnsResolver {
+
+ struct TEvDns {
+ enum EEv {
+ EvGetHostByName = EventSpaceBegin(TEvents::ES_DNS),
+ EvGetHostByNameResult,
+ EvGetAddr,
+ EvGetAddrResult,
+ };
+
+ /**
+ * TEvGetHostByName returns the result of ares_gethostbyname
+ */
+ struct TEvGetHostByName : public TEventLocal<TEvGetHostByName, EvGetHostByName> {
+ TString Name;
+ int Family;
+
+ explicit TEvGetHostByName(TString name, int family = AF_UNSPEC)
+ : Name(std::move(name))
+ , Family(family)
+ { }
+ };
+
+ struct TEvGetHostByNameResult : public TEventLocal<TEvGetHostByNameResult, EvGetHostByNameResult> {
+ TVector<struct in_addr> AddrsV4;
+ TVector<struct in6_addr> AddrsV6;
+ TString ErrorText;
+ int Status = 0;
+ };
+
+ /**
+ * TEvGetAddr returns a single address for a given hostname
+ */
+ struct TEvGetAddr : public TEventLocal<TEvGetAddr, EvGetAddr> {
+ TString Name;
+ int Family;
+
+ explicit TEvGetAddr(TString name, int family = AF_UNSPEC)
+ : Name(std::move(name))
+ , Family(family)
+ { }
+ };
+
+ struct TEvGetAddrResult : public TEventLocal<TEvGetAddrResult, EvGetAddrResult> {
+ // N.B. "using" here doesn't work with Visual Studio compiler
+ typedef struct in6_addr TIPv6Addr;
+ typedef struct in_addr TIPv4Addr;
+
+ std::variant<std::monostate, TIPv6Addr, TIPv4Addr> Addr;
+ TString ErrorText;
+ int Status = 0;
+
+ bool IsV6() const {
+ return std::holds_alternative<TIPv6Addr>(Addr);
+ }
+
+ bool IsV4() const {
+ return std::holds_alternative<TIPv4Addr>(Addr);
+ }
+
+ const TIPv6Addr& GetAddrV6() const {
+ const TIPv6Addr* p = std::get_if<TIPv6Addr>(&Addr);
+ Y_VERIFY(p, "Result is not an ipv6 address");
+ return *p;
+ }
+
+ const TIPv4Addr& GetAddrV4() const {
+ const TIPv4Addr* p = std::get_if<TIPv4Addr>(&Addr);
+ Y_VERIFY(p, "Result is not an ipv4 address");
+ return *p;
+ }
+ };
+ };
+
+ struct TSimpleDnsResolverOptions {
+ // Initial per-server timeout, grows exponentially with each retry
+ TDuration Timeout = TDuration::Seconds(1);
+ // Number of attempts per-server
+ int Attempts = 2;
+ // Optional list of custom dns servers (ip.v4[:port], ip::v6 or [ip::v6]:port format)
+ TVector<TString> Servers;
+ };
+
+ IActor* CreateSimpleDnsResolver(TSimpleDnsResolverOptions options = TSimpleDnsResolverOptions());
+
+ struct TCachingDnsResolverOptions {
+ // Soft expire time specifies delay before name is refreshed in background
+ TDuration SoftNegativeExpireTime = TDuration::Seconds(1);
+ TDuration SoftPositiveExpireTime = TDuration::Seconds(10);
+ // Hard expire time specifies delay before the last result is forgotten
+ TDuration HardNegativeExpireTime = TDuration::Seconds(10);
+ TDuration HardPositiveExpireTime = TDuration::Hours(2);
+ // Allow these request families
+ bool AllowIPv6 = true;
+ bool AllowIPv4 = true;
+ // Optional counters
+ NMonitoring::TDynamicCounterPtr MonCounters = nullptr;
+ };
+
+ IActor* CreateCachingDnsResolver(TActorId upstream, TCachingDnsResolverOptions options = TCachingDnsResolverOptions());
+
+ struct TOnDemandDnsResolverOptions
+ : public TSimpleDnsResolverOptions
+ , public TCachingDnsResolverOptions
+ {
+ };
+
+ IActor* CreateOnDemandDnsResolver(TOnDemandDnsResolverOptions options = TOnDemandDnsResolverOptions());
+
+ /**
+ * Returns actor id of a globally registered dns resolver
+ */
+ inline TActorId MakeDnsResolverActorId() {
+ return TActorId(0, TStringBuf("dnsresolver"));
+ }
+
+} // namespace NDnsResolver
+} // namespace NActors
diff --git a/library/cpp/actors/dnsresolver/dnsresolver_caching.cpp b/library/cpp/actors/dnsresolver/dnsresolver_caching.cpp
new file mode 100644
index 0000000000..02760f4c27
--- /dev/null
+++ b/library/cpp/actors/dnsresolver/dnsresolver_caching.cpp
@@ -0,0 +1,730 @@
+#include "dnsresolver.h"
+
+#include <library/cpp/actors/core/hfunc.h>
+#include <util/generic/intrlist.h>
+
+#include <ares.h>
+
+#include <queue>
+
+namespace NActors {
+namespace NDnsResolver {
+
+ class TCachingDnsResolver : public TActor<TCachingDnsResolver> {
+ public:
+ struct TMonCounters {
+ NMonitoring::TDynamicCounters::TCounterPtr OutgoingInFlightV4;
+ NMonitoring::TDynamicCounters::TCounterPtr OutgoingInFlightV6;
+ NMonitoring::TDynamicCounters::TCounterPtr OutgoingErrorsV4;
+ NMonitoring::TDynamicCounters::TCounterPtr OutgoingErrorsV6;
+ NMonitoring::TDynamicCounters::TCounterPtr OutgoingTotalV4;
+ NMonitoring::TDynamicCounters::TCounterPtr OutgoingTotalV6;
+
+ NMonitoring::TDynamicCounters::TCounterPtr IncomingInFlight;
+ NMonitoring::TDynamicCounters::TCounterPtr IncomingErrors;
+ NMonitoring::TDynamicCounters::TCounterPtr IncomingTotal;
+
+ NMonitoring::TDynamicCounters::TCounterPtr CacheSize;
+ NMonitoring::TDynamicCounters::TCounterPtr CacheHits;
+ NMonitoring::TDynamicCounters::TCounterPtr CacheMisses;
+
+ TMonCounters(const NMonitoring::TDynamicCounterPtr& counters)
+ : OutgoingInFlightV4(counters->GetCounter("DnsResolver/Outgoing/InFlight/V4", false))
+ , OutgoingInFlightV6(counters->GetCounter("DnsResolver/Outgoing/InFlight/V6", false))
+ , OutgoingErrorsV4(counters->GetCounter("DnsResolver/Outgoing/Errors/V4", true))
+ , OutgoingErrorsV6(counters->GetCounter("DnsResolver/Outgoing/Errors/V6", true))
+ , OutgoingTotalV4(counters->GetCounter("DnsResolver/Outgoing/Total/V4", true))
+ , OutgoingTotalV6(counters->GetCounter("DnsResolver/Outgoing/Total/V6", true))
+ , IncomingInFlight(counters->GetCounter("DnsResolver/Incoming/InFlight", false))
+ , IncomingErrors(counters->GetCounter("DnsResolver/Incoming/Errors", true))
+ , IncomingTotal(counters->GetCounter("DnsResolver/Incoming/Total", true))
+ , CacheSize(counters->GetCounter("DnsResolver/Cache/Size", false))
+ , CacheHits(counters->GetCounter("DnsResolver/Cache/Hits", true))
+ , CacheMisses(counters->GetCounter("DnsResolver/Cache/Misses", true))
+ { }
+ };
+
+ public:
+ TCachingDnsResolver(TActorId upstream, TCachingDnsResolverOptions options)
+ : TActor(&TThis::StateWork)
+ , Upstream(upstream)
+ , Options(std::move(options))
+ , MonCounters(Options.MonCounters ? new TMonCounters(Options.MonCounters) : nullptr)
+ { }
+
+ static constexpr EActivityType ActorActivityType() {
+ return DNS_RESOLVER;
+ }
+
+ private:
+ STRICT_STFUNC(StateWork, {
+ hFunc(TEvents::TEvPoison, Handle);
+ hFunc(TEvDns::TEvGetHostByName, Handle);
+ hFunc(TEvDns::TEvGetAddr, Handle);
+ hFunc(TEvDns::TEvGetHostByNameResult, Handle);
+ hFunc(TEvents::TEvUndelivered, Handle);
+ });
+
+ void Handle(TEvents::TEvPoison::TPtr&) {
+ DropPending(ARES_ECANCELLED);
+ PassAway();
+ }
+
+ void Handle(TEvDns::TEvGetHostByName::TPtr& ev) {
+ auto req = MakeHolder<TIncomingRequest>();
+ req->Type = EIncomingRequestType::GetHostByName;
+ req->Sender = ev->Sender;
+ req->Cookie = ev->Cookie;
+ req->Name = std::move(ev->Get()->Name);
+ req->Family = ev->Get()->Family;
+ EnqueueRequest(std::move(req));
+ }
+
+ void Handle(TEvDns::TEvGetAddr::TPtr& ev) {
+ auto req = MakeHolder<TIncomingRequest>();
+ req->Type = EIncomingRequestType::GetAddr;
+ req->Sender = ev->Sender;
+ req->Cookie = ev->Cookie;
+ req->Name = std::move(ev->Get()->Name);
+ req->Family = ev->Get()->Family;
+ EnqueueRequest(std::move(req));
+ }
+
+ void Handle(TEvDns::TEvGetHostByNameResult::TPtr& ev) {
+ auto waitingIt = WaitingRequests.find(ev->Cookie);
+ Y_VERIFY(waitingIt != WaitingRequests.end(), "Unexpected reply, reqId=%" PRIu64, ev->Cookie);
+ auto waitingInfo = waitingIt->second;
+ WaitingRequests.erase(waitingIt);
+
+ switch (waitingInfo.Family) {
+ case AF_INET6:
+ if (ev->Get()->Status) {
+ ProcessErrorV6(waitingInfo.Position, ev->Get()->Status, std::move(ev->Get()->ErrorText));
+ } else {
+ ProcessAddrsV6(waitingInfo.Position, std::move(ev->Get()->AddrsV6));
+ }
+ break;
+
+ case AF_INET:
+ if (ev->Get()->Status) {
+ ProcessErrorV4(waitingInfo.Position, ev->Get()->Status, std::move(ev->Get()->ErrorText));
+ } else {
+ ProcessAddrsV4(waitingInfo.Position, std::move(ev->Get()->AddrsV4));
+ }
+ break;
+
+ default:
+ Y_FAIL("Unexpected request family %d", waitingInfo.Family);
+ }
+ }
+
+ void Handle(TEvents::TEvUndelivered::TPtr& ev) {
+ switch (ev->Get()->SourceType) {
+ case TEvDns::TEvGetHostByName::EventType: {
+ auto waitingIt = WaitingRequests.find(ev->Cookie);
+ Y_VERIFY(waitingIt != WaitingRequests.end(), "Unexpected TEvUndelivered, reqId=%" PRIu64, ev->Cookie);
+ auto waitingInfo = waitingIt->second;
+ WaitingRequests.erase(waitingIt);
+
+ switch (waitingInfo.Family) {
+ case AF_INET6:
+ ProcessErrorV6(waitingInfo.Position, ARES_ENOTINITIALIZED, "Caching dns resolver cannot deliver to the underlying resolver");
+ break;
+ case AF_INET:
+ ProcessErrorV4(waitingInfo.Position, ARES_ENOTINITIALIZED, "Caching dns resolver cannot deliver to the underlying resolver");
+ break;
+ default:
+ Y_FAIL("Unexpected request family %d", waitingInfo.Family);
+ }
+
+ break;
+ }
+
+ default:
+ Y_FAIL("Unexpected TEvUndelievered, type=%" PRIu32, ev->Get()->SourceType);
+ }
+ }
+
+ private:
+ enum EIncomingRequestType {
+ GetHostByName,
+ GetAddr,
+ };
+
+ struct TIncomingRequest : public TIntrusiveListItem<TIncomingRequest> {
+ EIncomingRequestType Type;
+ TActorId Sender;
+ ui64 Cookie;
+ TString Name;
+ int Family;
+ };
+
+ using TIncomingRequestList = TIntrusiveListWithAutoDelete<TIncomingRequest, TDelete>;
+
+ void EnqueueRequest(THolder<TIncomingRequest> req) {
+ if (MonCounters) {
+ ++*MonCounters->IncomingTotal;
+ }
+
+ CleanupExpired(TActivationContext::Now());
+
+ switch (req->Family) {
+ case AF_UNSPEC:
+ if (Options.AllowIPv6) {
+ EnqueueRequestIPv6(std::move(req));
+ return;
+ }
+ if (Options.AllowIPv4) {
+ EnqueueRequestIPv4(std::move(req));
+ return;
+ }
+ break;
+
+ case AF_INET6:
+ if (Options.AllowIPv6) {
+ EnqueueRequestIPv6(std::move(req));
+ return;
+ }
+ break;
+
+ case AF_INET:
+ if (Options.AllowIPv4) {
+ EnqueueRequestIPv4(std::move(req));
+ return;
+ }
+ break;
+ }
+
+ ReplyWithError(std::move(req), ARES_EBADFAMILY);
+ }
+
+ void EnqueueRequestIPv6(THolder<TIncomingRequest> req) {
+ auto now = TActivationContext::Now();
+
+ auto& fullState = NameToState[req->Name];
+ if (MonCounters) {
+ *MonCounters->CacheSize = NameToState.size();
+ }
+
+ auto& state = fullState.StateIPv6;
+ EnsureRequest(state, req->Name, AF_INET6, now);
+
+ if (state.IsHardExpired(now)) {
+ Y_VERIFY(state.Waiting);
+ if (MonCounters) {
+ ++*MonCounters->CacheMisses;
+ }
+ // We need to wait for ipv6 reply, schedule ipv4 request in parallel if needed
+ if (Options.AllowIPv4) {
+ EnsureRequest(fullState.StateIPv4, req->Name, AF_INET, now);
+ }
+ state.WaitingRequests.PushBack(req.Release());
+ return;
+ }
+
+ // We want to retry AF_UNSPEC with IPv4 in some cases
+ if (req->Family == AF_UNSPEC && Options.AllowIPv4 && state.RetryUnspec()) {
+ EnqueueRequestIPv4(std::move(req));
+ return;
+ }
+
+ if (MonCounters) {
+ ++*MonCounters->CacheHits;
+ }
+
+ if (state.Status != 0) {
+ ReplyWithError(std::move(req), state.Status, state.ErrorText);
+ } else {
+ ReplyWithAddrs(std::move(req), fullState.AddrsIPv6);
+ }
+ }
+
+ void EnqueueRequestIPv4(THolder<TIncomingRequest> req, bool isCacheMiss = false) {
+ auto now = TActivationContext::Now();
+
+ auto& fullState = NameToState[req->Name];
+ if (MonCounters) {
+ *MonCounters->CacheSize = NameToState.size();
+ }
+
+ auto& state = fullState.StateIPv4;
+ EnsureRequest(state, req->Name, AF_INET, now);
+
+ if (state.IsHardExpired(now)) {
+ Y_VERIFY(state.Waiting);
+ if (MonCounters && !isCacheMiss) {
+ ++*MonCounters->CacheMisses;
+ }
+ state.WaitingRequests.PushBack(req.Release());
+ return;
+ }
+
+ if (MonCounters && !isCacheMiss) {
+ ++*MonCounters->CacheHits;
+ }
+
+ if (state.Status != 0) {
+ ReplyWithError(std::move(req), state.Status, state.ErrorText);
+ } else {
+ ReplyWithAddrs(std::move(req), fullState.AddrsIPv4);
+ }
+ }
+
+ private:
+ struct TFamilyState {
+ TIncomingRequestList WaitingRequests;
+ TInstant SoftDeadline;
+ TInstant HardDeadline;
+ TInstant NextSoftDeadline;
+ TInstant NextHardDeadline;
+ TString ErrorText;
+ int Status = -1; // never requested before
+ bool InSoftHeap = false;
+ bool InHardHeap = false;
+ bool Waiting = false;
+
+ bool Needed() const {
+ return InSoftHeap || InHardHeap || Waiting;
+ }
+
+ bool RetryUnspec() const {
+ return (
+ Status == ARES_ENODATA ||
+ Status == ARES_EBADRESP ||
+ Status == ARES_ETIMEOUT);
+ }
+
+ bool ServerReplied() const {
+ return ServerReplied(Status);
+ }
+
+ bool IsSoftExpired(TInstant now) const {
+ return !InSoftHeap || NextSoftDeadline < now;
+ }
+
+ bool IsHardExpired(TInstant now) const {
+ return !InHardHeap || NextHardDeadline < now;
+ }
+
+ static bool ServerReplied(int status) {
+ return (
+ status == ARES_SUCCESS ||
+ status == ARES_ENODATA ||
+ status == ARES_ENOTFOUND);
+ }
+ };
+
+ struct TState {
+ TFamilyState StateIPv6;
+ TFamilyState StateIPv4;
+ TVector<struct in6_addr> AddrsIPv6;
+ TVector<struct in_addr> AddrsIPv4;
+
+ bool Needed() const {
+ return StateIPv6.Needed() || StateIPv4.Needed();
+ }
+ };
+
+ using TNameToState = THashMap<TString, TState>;
+
+ template<const TFamilyState TState::* StateToFamily,
+ const TInstant TFamilyState::* FamilyToDeadline>
+ struct THeapCompare {
+ // returns true when b < a
+ bool operator()(TNameToState::iterator a, TNameToState::iterator b) const {
+ const TState& aState = a->second;
+ const TState& bState = b->second;
+ const TFamilyState& aFamily = aState.*StateToFamily;
+ const TFamilyState& bFamily = bState.*StateToFamily;
+ const TInstant& aDeadline = aFamily.*FamilyToDeadline;
+ const TInstant& bDeadline = bFamily.*FamilyToDeadline;
+ return bDeadline < aDeadline;
+ }
+ };
+
+ template<const TFamilyState TState::* StateToFamily,
+ const TInstant TFamilyState::* FamilyToDeadline>
+ using TStateHeap = std::priority_queue<
+ TNameToState::iterator,
+ std::vector<TNameToState::iterator>,
+ THeapCompare<StateToFamily, FamilyToDeadline>
+ >;
+
+ struct TWaitingInfo {
+ TNameToState::iterator Position;
+ int Family;
+ };
+
+ private:
+ void EnsureRequest(TFamilyState& state, const TString& name, int family, TInstant now) {
+ if (state.Waiting) {
+ return; // request is already pending
+ }
+
+ if (!state.IsSoftExpired(now) && !state.IsHardExpired(now)) {
+ return; // response is not expired yet
+ }
+
+ if (MonCounters) {
+ switch (family) {
+ case AF_INET6:
+ ++*MonCounters->OutgoingInFlightV6;
+ ++*MonCounters->OutgoingTotalV6;
+ break;
+ case AF_INET:
+ ++*MonCounters->OutgoingInFlightV4;
+ ++*MonCounters->OutgoingTotalV4;
+ break;
+ }
+ }
+
+ ui64 reqId = ++LastRequestId;
+ auto& req = WaitingRequests[reqId];
+ req.Position = NameToState.find(name);
+ req.Family = family;
+ Y_VERIFY(req.Position != NameToState.end());
+
+ Send(Upstream, new TEvDns::TEvGetHostByName(name, family), IEventHandle::FlagTrackDelivery, reqId);
+ state.Waiting = true;
+ }
+
+ template<TFamilyState TState::* StateToFamily,
+ TInstant TFamilyState::* FamilyToDeadline,
+ TInstant TFamilyState::* FamilyToNextDeadline,
+ bool TFamilyState::* FamilyToFlag,
+ class THeap>
+ void PushToHeap(THeap& heap, TNameToState::iterator it, TInstant newDeadline) {
+ auto& family = it->second.*StateToFamily;
+ TInstant& deadline = family.*FamilyToDeadline;
+ TInstant& nextDeadline = family.*FamilyToNextDeadline;
+ bool& flag = family.*FamilyToFlag;
+ nextDeadline = newDeadline;
+ if (!flag) {
+ deadline = newDeadline;
+ heap.push(it);
+ flag = true;
+ }
+ }
+
+ void PushSoftV6(TNameToState::iterator it, TInstant newDeadline) {
+ PushToHeap<&TState::StateIPv6, &TFamilyState::SoftDeadline, &TFamilyState::NextSoftDeadline, &TFamilyState::InSoftHeap>(SoftHeapIPv6, it, newDeadline);
+ }
+
+ void PushHardV6(TNameToState::iterator it, TInstant newDeadline) {
+ PushToHeap<&TState::StateIPv6, &TFamilyState::HardDeadline, &TFamilyState::NextHardDeadline, &TFamilyState::InHardHeap>(HardHeapIPv6, it, newDeadline);
+ }
+
+ void PushSoftV4(TNameToState::iterator it, TInstant newDeadline) {
+ PushToHeap<&TState::StateIPv4, &TFamilyState::SoftDeadline, &TFamilyState::NextSoftDeadline, &TFamilyState::InSoftHeap>(SoftHeapIPv4, it, newDeadline);
+ }
+
+ void PushHardV4(TNameToState::iterator it, TInstant newDeadline) {
+ PushToHeap<&TState::StateIPv4, &TFamilyState::HardDeadline, &TFamilyState::NextHardDeadline, &TFamilyState::InHardHeap>(HardHeapIPv4, it, newDeadline);
+ }
+
+ void ProcessErrorV6(TNameToState::iterator it, int status, TString errorText) {
+ auto now = TActivationContext::Now();
+ if (MonCounters) {
+ --*MonCounters->OutgoingInFlightV6;
+ ++*MonCounters->OutgoingErrorsV6;
+ }
+
+ auto& state = it->second.StateIPv6;
+ Y_VERIFY(state.Waiting, "Got error for a state we are not waiting");
+ state.Waiting = false;
+
+ // When we have a cached positive reply, don't overwrite it with spurious errors
+ const bool serverReplied = TFamilyState::ServerReplied(status);
+ if (!serverReplied && state.ServerReplied() && !state.IsHardExpired(now)) {
+ PushSoftV6(it, now + Options.SoftNegativeExpireTime);
+ if (state.Status == ARES_SUCCESS) {
+ SendAddrsV6(it);
+ } else {
+ SendErrorsV6(it, now);
+ }
+ return;
+ }
+
+ state.Status = status;
+ state.ErrorText = std::move(errorText);
+ PushSoftV6(it, now + Options.SoftNegativeExpireTime);
+ if (serverReplied) {
+ // Server actually replied, so keep it cached for longer
+ PushHardV6(it, now + Options.HardPositiveExpireTime);
+ } else {
+ PushHardV6(it, now + Options.HardNegativeExpireTime);
+ }
+
+ SendErrorsV6(it, now);
+ }
+
+ void SendErrorsV6(TNameToState::iterator it, TInstant now) {
+ bool cleaned = false;
+ auto& state = it->second.StateIPv6;
+ while (state.WaitingRequests) {
+ THolder<TIncomingRequest> req(state.WaitingRequests.PopFront());
+ if (req->Family == AF_UNSPEC && Options.AllowIPv4 && state.RetryUnspec()) {
+ if (!cleaned) {
+ CleanupExpired(now);
+ cleaned = true;
+ }
+ EnqueueRequestIPv4(std::move(req), /* isCacheMiss */ true);
+ } else {
+ ReplyWithError(std::move(req), state.Status, state.ErrorText);
+ }
+ }
+ }
+
+ void ProcessErrorV4(TNameToState::iterator it, int status, TString errorText) {
+ auto now = TActivationContext::Now();
+ if (MonCounters) {
+ --*MonCounters->OutgoingInFlightV4;
+ ++*MonCounters->OutgoingErrorsV4;
+ }
+
+ auto& state = it->second.StateIPv4;
+ Y_VERIFY(state.Waiting, "Got error for a state we are not waiting");
+ state.Waiting = false;
+
+ // When we have a cached positive reply, don't overwrite it with spurious errors
+ const bool serverReplied = TFamilyState::ServerReplied(status);
+ if (!serverReplied && state.ServerReplied() && !state.IsHardExpired(now)) {
+ PushSoftV4(it, now + Options.SoftNegativeExpireTime);
+ if (state.Status == ARES_SUCCESS) {
+ SendAddrsV4(it);
+ } else {
+ SendErrorsV4(it);
+ }
+ return;
+ }
+
+ state.Status = status;
+ state.ErrorText = std::move(errorText);
+ PushSoftV4(it, now + Options.SoftNegativeExpireTime);
+ if (serverReplied) {
+ // Server actually replied, so keep it cached for longer
+ PushHardV4(it, now + Options.HardPositiveExpireTime);
+ } else {
+ PushHardV4(it, now + Options.HardNegativeExpireTime);
+ }
+
+ SendErrorsV4(it);
+ }
+
+ void SendErrorsV4(TNameToState::iterator it) {
+ auto& state = it->second.StateIPv4;
+ while (state.WaitingRequests) {
+ THolder<TIncomingRequest> req(state.WaitingRequests.PopFront());
+ ReplyWithError(std::move(req), state.Status, state.ErrorText);
+ }
+ }
+
+ void ProcessAddrsV6(TNameToState::iterator it, TVector<struct in6_addr> addrs) {
+ if (Y_UNLIKELY(addrs.empty())) {
+ // Probably unnecessary: we don't want to deal with empty address lists
+ return ProcessErrorV6(it, ARES_ENODATA, ares_strerror(ARES_ENODATA));
+ }
+
+ auto now = TActivationContext::Now();
+ if (MonCounters) {
+ --*MonCounters->OutgoingInFlightV6;
+ }
+
+ auto& state = it->second.StateIPv6;
+ Y_VERIFY(state.Waiting, "Got reply for a state we are not waiting");
+ state.Waiting = false;
+
+ state.Status = ARES_SUCCESS;
+ it->second.AddrsIPv6 = std::move(addrs);
+ PushSoftV6(it, now + Options.SoftPositiveExpireTime);
+ PushHardV6(it, now + Options.HardPositiveExpireTime);
+
+ SendAddrsV6(it);
+ }
+
+ void SendAddrsV6(TNameToState::iterator it) {
+ auto& state = it->second.StateIPv6;
+ while (state.WaitingRequests) {
+ THolder<TIncomingRequest> req(state.WaitingRequests.PopFront());
+ ReplyWithAddrs(std::move(req), it->second.AddrsIPv6);
+ }
+ }
+
+ void ProcessAddrsV4(TNameToState::iterator it, TVector<struct in_addr> addrs) {
+ if (Y_UNLIKELY(addrs.empty())) {
+ // Probably unnecessary: we don't want to deal with empty address lists
+ return ProcessErrorV4(it, ARES_ENODATA, ares_strerror(ARES_ENODATA));
+ }
+
+ auto now = TActivationContext::Now();
+ if (MonCounters) {
+ --*MonCounters->OutgoingInFlightV4;
+ }
+
+ auto& state = it->second.StateIPv4;
+ Y_VERIFY(state.Waiting, "Got reply for a state we are not waiting");
+ state.Waiting = false;
+
+ state.Status = ARES_SUCCESS;
+ it->second.AddrsIPv4 = std::move(addrs);
+ PushSoftV4(it, now + Options.SoftPositiveExpireTime);
+ PushHardV4(it, now + Options.HardPositiveExpireTime);
+
+ SendAddrsV4(it);
+ }
+
+ void SendAddrsV4(TNameToState::iterator it) {
+ auto& state = it->second.StateIPv4;
+ while (state.WaitingRequests) {
+ THolder<TIncomingRequest> req(state.WaitingRequests.PopFront());
+ ReplyWithAddrs(std::move(req), it->second.AddrsIPv4);
+ }
+ }
+
+ private:
+ template<TFamilyState TState::*StateToFamily,
+ TInstant TFamilyState::* FamilyToDeadline,
+ TInstant TFamilyState::* FamilyToNextDeadline,
+ bool TFamilyState::* FamilyToFlag>
+ void DoCleanupExpired(TStateHeap<StateToFamily, FamilyToDeadline>& heap, TInstant now) {
+ while (!heap.empty()) {
+ auto it = heap.top();
+ auto& family = it->second.*StateToFamily;
+ TInstant& deadline = family.*FamilyToDeadline;
+ if (now <= deadline) {
+ break;
+ }
+
+ bool& flag = family.*FamilyToFlag;
+ Y_VERIFY(flag);
+ heap.pop();
+ flag = false;
+
+ TInstant& nextDeadline = family.*FamilyToNextDeadline;
+ if (now < nextDeadline) {
+ deadline = nextDeadline;
+ heap.push(it);
+ flag = true;
+ continue;
+ }
+
+ // Remove unnecessary items
+ if (!it->second.Needed()) {
+ NameToState.erase(it);
+ if (MonCounters) {
+ *MonCounters->CacheSize = NameToState.size();
+ }
+ }
+ }
+ }
+
+ void CleanupExpired(TInstant now) {
+ DoCleanupExpired<&TState::StateIPv6, &TFamilyState::SoftDeadline, &TFamilyState::NextSoftDeadline, &TFamilyState::InSoftHeap>(SoftHeapIPv6, now);
+ DoCleanupExpired<&TState::StateIPv6, &TFamilyState::HardDeadline, &TFamilyState::NextHardDeadline, &TFamilyState::InHardHeap>(HardHeapIPv6, now);
+ DoCleanupExpired<&TState::StateIPv4, &TFamilyState::SoftDeadline, &TFamilyState::NextSoftDeadline, &TFamilyState::InSoftHeap>(SoftHeapIPv4, now);
+ DoCleanupExpired<&TState::StateIPv4, &TFamilyState::HardDeadline, &TFamilyState::NextHardDeadline, &TFamilyState::InHardHeap>(HardHeapIPv4, now);
+ }
+
+ template<class TEvent>
+ void SendError(TActorId replyTo, ui64 cookie, int status, const TString& errorText) {
+ auto reply = MakeHolder<TEvent>();
+ reply->Status = status;
+ reply->ErrorText = errorText;
+ this->Send(replyTo, reply.Release(), 0, cookie);
+ }
+
+ void ReplyWithError(THolder<TIncomingRequest> req, int status, const TString& errorText) {
+ if (MonCounters) {
+ ++*MonCounters->IncomingErrors;
+ }
+ switch (req->Type) {
+ case EIncomingRequestType::GetHostByName: {
+ SendError<TEvDns::TEvGetHostByNameResult>(req->Sender, req->Cookie, status, errorText);
+ break;
+ }
+ case EIncomingRequestType::GetAddr: {
+ SendError<TEvDns::TEvGetAddrResult>(req->Sender, req->Cookie, status, errorText);
+ break;
+ }
+ }
+ }
+
+ void ReplyWithAddrs(THolder<TIncomingRequest> req, const TVector<struct in6_addr>& addrs) {
+ switch (req->Type) {
+ case EIncomingRequestType::GetHostByName: {
+ auto reply = MakeHolder<TEvDns::TEvGetHostByNameResult>();
+ reply->AddrsV6 = addrs;
+ Send(req->Sender, reply.Release(), 0, req->Cookie);
+ break;
+ }
+ case EIncomingRequestType::GetAddr: {
+ Y_VERIFY(!addrs.empty());
+ auto reply = MakeHolder<TEvDns::TEvGetAddrResult>();
+ reply->Addr = addrs.front();
+ Send(req->Sender, reply.Release(), 0, req->Cookie);
+ break;
+ }
+ }
+ }
+
+ void ReplyWithAddrs(THolder<TIncomingRequest> req, const TVector<struct in_addr>& addrs) {
+ switch (req->Type) {
+ case EIncomingRequestType::GetHostByName: {
+ auto reply = MakeHolder<TEvDns::TEvGetHostByNameResult>();
+ reply->AddrsV4 = addrs;
+ Send(req->Sender, reply.Release(), 0, req->Cookie);
+ break;
+ }
+ case EIncomingRequestType::GetAddr: {
+ Y_VERIFY(!addrs.empty());
+ auto reply = MakeHolder<TEvDns::TEvGetAddrResult>();
+ reply->Addr = addrs.front();
+ Send(req->Sender, reply.Release(), 0, req->Cookie);
+ break;
+ }
+ }
+ }
+
+ void ReplyWithError(THolder<TIncomingRequest> req, int status) {
+ ReplyWithError(std::move(req), status, ares_strerror(status));
+ }
+
+ void DropPending(TIncomingRequestList& list, int status, const TString& errorText) {
+ while (list) {
+ THolder<TIncomingRequest> req(list.PopFront());
+ ReplyWithError(std::move(req), status, errorText);
+ }
+ }
+
+ void DropPending(int status, const TString& errorText) {
+ for (auto& [name, state] : NameToState) {
+ DropPending(state.StateIPv6.WaitingRequests, status, errorText);
+ DropPending(state.StateIPv4.WaitingRequests, status, errorText);
+ }
+ }
+
+ void DropPending(int status) {
+ DropPending(status, ares_strerror(status));
+ }
+
+ private:
+ const TActorId Upstream;
+ const TCachingDnsResolverOptions Options;
+ const THolder<TMonCounters> MonCounters;
+
+ TNameToState NameToState;
+ TStateHeap<&TState::StateIPv6, &TFamilyState::SoftDeadline> SoftHeapIPv6;
+ TStateHeap<&TState::StateIPv6, &TFamilyState::HardDeadline> HardHeapIPv6;
+ TStateHeap<&TState::StateIPv4, &TFamilyState::SoftDeadline> SoftHeapIPv4;
+ TStateHeap<&TState::StateIPv4, &TFamilyState::HardDeadline> HardHeapIPv4;
+
+ THashMap<ui64, TWaitingInfo> WaitingRequests;
+ ui64 LastRequestId = 0;
+ };
+
+ IActor* CreateCachingDnsResolver(TActorId upstream, TCachingDnsResolverOptions options) {
+ return new TCachingDnsResolver(upstream, std::move(options));
+ }
+
+} // namespace NDnsResolver
+} // namespace NActors
diff --git a/library/cpp/actors/dnsresolver/dnsresolver_caching_ut.cpp b/library/cpp/actors/dnsresolver/dnsresolver_caching_ut.cpp
new file mode 100644
index 0000000000..c3b7cb3c77
--- /dev/null
+++ b/library/cpp/actors/dnsresolver/dnsresolver_caching_ut.cpp
@@ -0,0 +1,630 @@
+#include "dnsresolver.h"
+
+#include <library/cpp/actors/core/hfunc.h>
+#include <library/cpp/actors/testlib/test_runtime.h>
+#include <library/cpp/testing/unittest/registar.h>
+#include <util/string/builder.h>
+
+#include <ares.h>
+
+using namespace NActors;
+using namespace NActors::NDnsResolver;
+
+// FIXME: use a mock resolver
+Y_UNIT_TEST_SUITE(CachingDnsResolver) {
+
+ struct TAddrToString {
+ TString operator()(const std::monostate&) const {
+ return "<nothing>";
+ }
+
+ TString operator()(const struct in6_addr& addr) const {
+ char dst[INET6_ADDRSTRLEN];
+ auto res = ares_inet_ntop(AF_INET6, &addr, dst, INET6_ADDRSTRLEN);
+ Y_VERIFY(res, "Cannot convert ipv6 address");
+ return dst;
+ }
+
+ TString operator()(const struct in_addr& addr) const {
+ char dst[INET_ADDRSTRLEN];
+ auto res = ares_inet_ntop(AF_INET, &addr, dst, INET_ADDRSTRLEN);
+ Y_VERIFY(res, "Cannot convert ipv4 address");
+ return dst;
+ }
+ };
+
+ TString AddrToString(const std::variant<std::monostate, struct in6_addr, struct in_addr>& v) {
+ return std::visit(TAddrToString(), v);
+ }
+
+ struct TMockReply {
+ static constexpr TDuration DefaultDelay = TDuration::MilliSeconds(1);
+
+ int Status = 0;
+ TDuration Delay;
+ TVector<struct in6_addr> AddrsV6;
+ TVector<struct in_addr> AddrsV4;
+
+ static TMockReply Error(int status, TDuration delay = DefaultDelay) {
+ Y_VERIFY(status != 0);
+ TMockReply reply;
+ reply.Status = status;
+ reply.Delay = delay;
+ return reply;
+ }
+
+ static TMockReply Empty(TDuration delay = DefaultDelay) {
+ TMockReply reply;
+ reply.Delay = delay;
+ return reply;
+ }
+
+ static TMockReply ManyV6(const TVector<TString>& addrs, TDuration delay = DefaultDelay) {
+ TMockReply reply;
+ reply.Delay = delay;
+ for (const TString& addr : addrs) {
+ void* dst = &reply.AddrsV6.emplace_back();
+ int status = ares_inet_pton(AF_INET6, addr.c_str(), dst);
+ Y_VERIFY(status == 1, "Invalid ipv6 address: %s", addr.c_str());
+ }
+ return reply;
+ }
+
+ static TMockReply ManyV4(const TVector<TString>& addrs, TDuration delay = DefaultDelay) {
+ TMockReply reply;
+ reply.Delay = delay;
+ for (const TString& addr : addrs) {
+ void* dst = &reply.AddrsV4.emplace_back();
+ int status = ares_inet_pton(AF_INET, addr.c_str(), dst);
+ Y_VERIFY(status == 1, "Invalid ipv4 address: %s", addr.c_str());
+ }
+ return reply;
+ }
+
+ static TMockReply SingleV6(const TString& addr, TDuration delay = DefaultDelay) {
+ return ManyV6({ addr }, delay);
+ }
+
+ static TMockReply SingleV4(const TString& addr, TDuration delay = DefaultDelay) {
+ return ManyV4({ addr }, delay);
+ }
+ };
+
+ using TMockDnsCallback = std::function<TMockReply (const TString&, int)>;
+
+ class TMockDnsResolver : public TActor<TMockDnsResolver> {
+ public:
+ TMockDnsResolver(TMockDnsCallback callback)
+ : TActor(&TThis::StateWork)
+ , Callback(std::move(callback))
+ { }
+
+ private:
+ struct TEvPrivate {
+ enum EEv {
+ EvScheduled = EventSpaceBegin(TEvents::ES_PRIVATE),
+ };
+
+ struct TEvScheduled : public TEventLocal<TEvScheduled, EvScheduled> {
+ TActorId Sender;
+ ui64 Cookie;
+ TMockReply Reply;
+
+ TEvScheduled(TActorId sender, ui64 cookie, TMockReply reply)
+ : Sender(sender)
+ , Cookie(cookie)
+ , Reply(std::move(reply))
+ { }
+ };
+ };
+
+ private:
+ STRICT_STFUNC(StateWork, {
+ hFunc(TEvents::TEvPoison, Handle);
+ hFunc(TEvDns::TEvGetHostByName, Handle);
+ hFunc(TEvPrivate::TEvScheduled, Handle);
+ });
+
+ void Handle(TEvents::TEvPoison::TPtr&) {
+ PassAway();
+ }
+
+ void Handle(TEvDns::TEvGetHostByName::TPtr& ev) {
+ auto reply = Callback(ev->Get()->Name, ev->Get()->Family);
+ if (reply.Delay) {
+ Schedule(reply.Delay, new TEvPrivate::TEvScheduled(ev->Sender, ev->Cookie, std::move(reply)));
+ } else {
+ SendReply(ev->Sender, ev->Cookie, std::move(reply));
+ }
+ }
+
+ void Handle(TEvPrivate::TEvScheduled::TPtr& ev) {
+ SendReply(ev->Get()->Sender, ev->Get()->Cookie, std::move(ev->Get()->Reply));
+ }
+
+ private:
+ void SendReply(const TActorId& sender, ui64 cookie, TMockReply&& reply) {
+ auto res = MakeHolder<TEvDns::TEvGetHostByNameResult>();
+ res->Status = reply.Status;
+ if (res->Status != 0) {
+ res->ErrorText = ares_strerror(res->Status);
+ } else {
+ res->AddrsV6 = std::move(reply.AddrsV6);
+ res->AddrsV4 = std::move(reply.AddrsV4);
+ }
+ Send(sender, res.Release(), 0, cookie);
+ }
+
+ private:
+ TMockDnsCallback Callback;
+ };
+
+ struct TCachingDnsRuntime : public TTestActorRuntimeBase {
+ TCachingDnsResolverOptions ResolverOptions;
+ TActorId MockResolver;
+ TActorId Resolver;
+ TActorId Sleeper;
+ TString Section_;
+
+ NMonitoring::TDynamicCounters::TCounterPtr InFlight6;
+ NMonitoring::TDynamicCounters::TCounterPtr InFlight4;
+ NMonitoring::TDynamicCounters::TCounterPtr Total6;
+ NMonitoring::TDynamicCounters::TCounterPtr Total4;
+ NMonitoring::TDynamicCounters::TCounterPtr Misses;
+ NMonitoring::TDynamicCounters::TCounterPtr Hits;
+
+ THashMap<TString, TMockReply> ReplyV6;
+ THashMap<TString, TMockReply> ReplyV4;
+
+ TCachingDnsRuntime() {
+ SetScheduledEventFilter([](auto&&, auto&&, auto&&, auto&&) { return false; });
+ ResolverOptions.MonCounters = new NMonitoring::TDynamicCounters;
+
+ ReplyV6["localhost"] = TMockReply::SingleV6("::1");
+ ReplyV4["localhost"] = TMockReply::SingleV4("127.0.0.1");
+ ReplyV6["yandex.ru"] = TMockReply::SingleV6("2a02:6b8:a::a", TDuration::MilliSeconds(500));
+ ReplyV4["yandex.ru"] = TMockReply::SingleV4("77.88.55.77", TDuration::MilliSeconds(250));
+ ReplyV6["router.asus.com"] = TMockReply::Error(ARES_ENODATA);
+ ReplyV4["router.asus.com"] = TMockReply::SingleV4("192.168.0.1");
+ }
+
+ void Start(TMockDnsCallback callback) {
+ MockResolver = Register(new TMockDnsResolver(std::move(callback)));
+ EnableScheduleForActor(MockResolver);
+ Resolver = Register(CreateCachingDnsResolver(MockResolver, ResolverOptions));
+ Sleeper = AllocateEdgeActor();
+
+ InFlight6 = ResolverOptions.MonCounters->GetCounter("DnsResolver/Outgoing/InFlight/V6", false);
+ InFlight4 = ResolverOptions.MonCounters->GetCounter("DnsResolver/Outgoing/InFlight/V4", false);
+ Total6 = ResolverOptions.MonCounters->GetCounter("DnsResolver/Outgoing/Total/V6", true);
+ Total4 = ResolverOptions.MonCounters->GetCounter("DnsResolver/Outgoing/Total/V4", true);
+ Misses = ResolverOptions.MonCounters->GetCounter("DnsResolver/Cache/Misses", true);
+ Hits = ResolverOptions.MonCounters->GetCounter("DnsResolver/Cache/Hits", true);
+ }
+
+ void Start() {
+ Start([this](const TString& name, int family) {
+ switch (family) {
+ case AF_INET6: {
+ auto it = ReplyV6.find(name);
+ if (it != ReplyV6.end()) {
+ return it->second;
+ }
+ break;
+ }
+ case AF_INET: {
+ auto it = ReplyV4.find(name);
+ if (it != ReplyV4.end()) {
+ return it->second;
+ }
+ break;
+ }
+ }
+ return TMockReply::Error(ARES_ENOTFOUND);
+ });
+ }
+
+ void Section(const TString& section) {
+ Section_ = section;
+ }
+
+ void Sleep(TDuration duration) {
+ Schedule(new IEventHandle(Sleeper, Sleeper, new TEvents::TEvWakeup), duration);
+ GrabEdgeEventRethrow<TEvents::TEvWakeup>(Sleeper);
+ }
+
+ void WaitNoInFlight() {
+ if (*InFlight6 || *InFlight4) {
+ TDispatchOptions options;
+ options.CustomFinalCondition = [&]() {
+ return !*InFlight6 && !*InFlight4;
+ };
+ DispatchEvents(options);
+ UNIT_ASSERT_C(!*InFlight6 && !*InFlight4, "Failed to wait for no inflight in " << Section_);
+ }
+ }
+
+ void SendGetHostByName(const TActorId& sender, const TString& name, int family = AF_UNSPEC) {
+ Send(new IEventHandle(Resolver, sender, new TEvDns::TEvGetHostByName(name, family)), 0, true);
+ }
+
+ void SendGetAddr(const TActorId& sender, const TString& name, int family = AF_UNSPEC) {
+ Send(new IEventHandle(Resolver, sender, new TEvDns::TEvGetAddr(name, family)), 0, true);
+ }
+
+ TEvDns::TEvGetHostByNameResult::TPtr WaitGetHostByName(const TActorId& sender) {
+ return GrabEdgeEventRethrow<TEvDns::TEvGetHostByNameResult>(sender);
+ }
+
+ TEvDns::TEvGetAddrResult::TPtr WaitGetAddr(const TActorId& sender) {
+ return GrabEdgeEventRethrow<TEvDns::TEvGetAddrResult>(sender);
+ }
+
+ void ExpectInFlight6(i64 count) {
+ UNIT_ASSERT_VALUES_EQUAL_C(InFlight6->Val(), count, Section_);
+ }
+
+ void ExpectInFlight4(i64 count) {
+ UNIT_ASSERT_VALUES_EQUAL_C(InFlight4->Val(), count, Section_);
+ }
+
+ void ExpectTotal6(i64 count) {
+ UNIT_ASSERT_VALUES_EQUAL_C(Total6->Val(), count, Section_);
+ }
+
+ void ExpectTotal4(i64 count) {
+ UNIT_ASSERT_VALUES_EQUAL_C(Total4->Val(), count, Section_);
+ }
+
+ void Expect6(i64 total, i64 inflight) {
+ UNIT_ASSERT_C(
+ Total6->Val() == total && InFlight6->Val() == inflight,
+ Section_ << ": Expect6(" << total << ", " << inflight << ") "
+ << " but got (" << Total6->Val() << ", " << InFlight6->Val() << ")");
+ }
+
+ void Expect4(i64 total, i64 inflight) {
+ UNIT_ASSERT_C(
+ Total4->Val() == total && InFlight4->Val() == inflight,
+ Section_ << ": Expect4(" << total << ", " << inflight << ") "
+ << " got (" << Total4->Val() << ", " << InFlight4->Val() << ")");
+ }
+
+ void ExpectMisses(i64 count) {
+ UNIT_ASSERT_VALUES_EQUAL_C(Misses->Val(), count, Section_);
+ }
+
+ void ExpectHits(i64 count) {
+ UNIT_ASSERT_VALUES_EQUAL_C(Hits->Val(), count, Section_);
+ }
+
+ void ExpectGetHostByNameError(const TActorId& sender, int status) {
+ auto ev = WaitGetHostByName(sender);
+ UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, status, Section_ << ": " << ev->Get()->ErrorText);
+ }
+
+ void ExpectGetAddrError(const TActorId& sender, int status) {
+ auto ev = WaitGetAddr(sender);
+ UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, status, Section_ << ": " << ev->Get()->ErrorText);
+ }
+
+ void ExpectGetHostByNameSuccess(const TActorId& sender, const TString& expected) {
+ auto ev = WaitGetHostByName(sender);
+ UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, 0, Section_ << ": " << ev->Get()->ErrorText);
+ TStringBuilder result;
+ for (const auto& addr : ev->Get()->AddrsV6) {
+ if (result) {
+ result << ',';
+ }
+ result << TAddrToString()(addr);
+ }
+ for (const auto& addr : ev->Get()->AddrsV4) {
+ if (result) {
+ result << ',';
+ }
+ result << TAddrToString()(addr);
+ }
+ UNIT_ASSERT_VALUES_EQUAL_C(TString(result), expected, Section_);
+ }
+
+ void ExpectGetAddrSuccess(const TActorId& sender, const TString& expected) {
+ auto ev = WaitGetAddr(sender);
+ UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, 0, Section_ << ": " << ev->Get()->ErrorText);
+ TString result = AddrToString(ev->Get()->Addr);
+ UNIT_ASSERT_VALUES_EQUAL_C(result, expected, Section_);
+ }
+ };
+
+ Y_UNIT_TEST(UnusableResolver) {
+ TCachingDnsRuntime runtime;
+ runtime.Initialize();
+ runtime.Start();
+
+ auto sender = runtime.AllocateEdgeActor();
+
+ runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC);
+ runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a");
+
+ runtime.Send(new IEventHandle(runtime.MockResolver, { }, new TEvents::TEvPoison), 0, true);
+ runtime.SendGetAddr(sender, "foo.ru", AF_UNSPEC);
+ runtime.ExpectGetAddrError(sender, ARES_ENOTINITIALIZED);
+ }
+
+ Y_UNIT_TEST(ResolveCaching) {
+ TCachingDnsRuntime runtime;
+ runtime.Initialize();
+ runtime.Start();
+
+ auto sender = runtime.AllocateEdgeActor();
+
+ // First time resolve, ipv4 and ipv6 sent in parallel, we wait for ipv6 result
+ runtime.Section("First time resolve");
+ runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC);
+ runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a");
+ runtime.Expect6(1, 0);
+ runtime.Expect4(1, 0);
+ runtime.ExpectMisses(1);
+ runtime.ExpectHits(0);
+
+ // Second resolve, ipv6 and ipv4 queries result in a cache hit
+ runtime.Section("Second resolve, ipv6");
+ runtime.SendGetAddr(sender, "yandex.ru", AF_INET6);
+ runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a");
+ runtime.Expect6(1, 0);
+ runtime.ExpectHits(1);
+ runtime.Section("Second resolve, ipv4");
+ runtime.SendGetAddr(sender, "yandex.ru", AF_INET);
+ runtime.ExpectGetAddrSuccess(sender, "77.88.55.77");
+ runtime.Expect4(1, 0);
+ runtime.ExpectHits(2);
+
+ // Wait until soft expiration and try ipv4 again
+ // Will cause a cache hit, but will start a new ipv4 request in background
+ runtime.Section("Retry ipv4 after soft expiration");
+ runtime.Sleep(TDuration::Seconds(15));
+ runtime.SendGetAddr(sender, "yandex.ru", AF_INET);
+ runtime.ExpectGetAddrSuccess(sender, "77.88.55.77");
+ runtime.Expect6(1, 0);
+ runtime.Expect4(2, 1);
+ runtime.ExpectMisses(1);
+ runtime.ExpectHits(3);
+ runtime.WaitNoInFlight();
+
+ // Wait until soft expiration and try both again
+ // Will cause a cache hit, but will start a new ipv6 request in background
+ runtime.Section("Retry both after soft expiration");
+ runtime.Sleep(TDuration::Seconds(15));
+ runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC);
+ runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a");
+ runtime.Expect6(2, 1);
+ runtime.Expect4(2, 0);
+ runtime.ExpectMisses(1);
+ runtime.ExpectHits(4);
+ runtime.WaitNoInFlight();
+
+ // Wait until hard expiration and try both again
+ // Will cause a cache miss and new resolve requests
+ runtime.Section("Retry both after hard expiration");
+ runtime.Sleep(TDuration::Hours(2));
+ runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC);
+ runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a");
+ runtime.Expect6(3, 0);
+ runtime.Expect4(3, 0);
+ runtime.ExpectMisses(2);
+ runtime.ExpectHits(4);
+
+ // Wait half the hard expiration time, must always result in a cache hit
+ runtime.Section("Retry both after half hard expiration");
+ for (ui64 i = 1; i <= 4; ++i) {
+ runtime.Sleep(TDuration::Hours(1));
+ runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC);
+ runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a");
+ runtime.Expect6(3 + i, 1);
+ runtime.ExpectHits(4 + i);
+ runtime.WaitNoInFlight();
+ }
+
+ // Change v6 result to a timeout, must keep using cached result until hard expiration
+ runtime.Section("Dns keeps timing out");
+ runtime.ReplyV6["yandex.ru"] = TMockReply::Error(ARES_ETIMEOUT);
+ for (ui64 i = 1; i <= 4; ++i) {
+ runtime.Sleep(TDuration::Seconds(15));
+ runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC);
+ runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a");
+ runtime.Expect6(7 + i, 1);
+ runtime.ExpectHits(8 + i);
+ runtime.WaitNoInFlight();
+ }
+
+ // Change v6 result to nodata, must switch to a v4 result eventually
+ runtime.Section("Host changes to being ipv4 only");
+ runtime.ReplyV6["yandex.ru"] = TMockReply::Error(ARES_ENODATA);
+ runtime.Sleep(TDuration::Seconds(2));
+ runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC);
+ runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a");
+ runtime.WaitNoInFlight();
+ runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC);
+ runtime.ExpectGetAddrSuccess(sender, "77.88.55.77");
+ runtime.Expect6(12, 0);
+ runtime.Expect4(4, 0);
+ runtime.ExpectMisses(3);
+
+ // Change v6 result to nxdomain, must not fall back to a v4 result
+ runtime.Section("Host is removed from dns");
+ runtime.ReplyV6["yandex.ru"] = TMockReply::Error(ARES_ENOTFOUND);
+ runtime.Sleep(TDuration::Seconds(15));
+ runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC);
+ runtime.ExpectGetAddrSuccess(sender, "77.88.55.77");
+ runtime.WaitNoInFlight();
+ runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC);
+ runtime.ExpectGetAddrError(sender, ARES_ENOTFOUND);
+ }
+
+ Y_UNIT_TEST(ResolveCachingV4) {
+ TCachingDnsRuntime runtime;
+ runtime.Initialize();
+ runtime.Start();
+
+ auto sender = runtime.AllocateEdgeActor();
+
+ runtime.Section("First request");
+ runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC);
+ runtime.ExpectGetAddrSuccess(sender, "192.168.0.1");
+ runtime.ExpectMisses(1);
+
+ runtime.Section("Second request");
+ runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC);
+ runtime.ExpectGetAddrSuccess(sender, "192.168.0.1");
+ runtime.ExpectHits(1);
+
+ runtime.Section("Dns keeps timing out");
+ runtime.ReplyV6["router.asus.com"] = TMockReply::Error(ARES_ETIMEOUT);
+ runtime.ReplyV4["router.asus.com"] = TMockReply::Error(ARES_ETIMEOUT);
+ for (ui64 i = 1; i <= 4; ++i) {
+ runtime.Sleep(TDuration::Seconds(15));
+ runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC);
+ runtime.ExpectGetAddrSuccess(sender, "192.168.0.1");
+ runtime.Expect6(1 + i, 1);
+ runtime.Expect4(1 + i, 1);
+ runtime.ExpectHits(1 + i);
+ runtime.WaitNoInFlight();
+ }
+
+ runtime.Section("Host is removed from ipv4 dns");
+ runtime.ReplyV4["router.asus.com"] = TMockReply::Error(ARES_ENOTFOUND);
+ runtime.Sleep(TDuration::Seconds(15));
+ runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC);
+ runtime.ExpectGetAddrSuccess(sender, "192.168.0.1");
+ runtime.WaitNoInFlight();
+ runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC);
+ runtime.ExpectGetAddrError(sender, ARES_ENOTFOUND);
+ }
+
+ Y_UNIT_TEST(EventualTimeout) {
+ TCachingDnsRuntime runtime;
+ runtime.Initialize();
+ runtime.Start();
+
+ auto sender = runtime.AllocateEdgeActor();
+
+ runtime.ReplyV6["notfound.ru"] = TMockReply::Error(ARES_ENODATA);
+ runtime.ReplyV4["notfound.ru"] = TMockReply::Error(ARES_ENOTFOUND);
+ runtime.SendGetAddr(sender, "notfound.ru", AF_UNSPEC);
+ runtime.ExpectGetAddrError(sender, ARES_ENOTFOUND);
+
+ runtime.ReplyV4["notfound.ru"] = TMockReply::Error(ARES_ETIMEOUT);
+ runtime.SendGetAddr(sender, "notfound.ru", AF_UNSPEC);
+ runtime.ExpectGetAddrError(sender, ARES_ENOTFOUND);
+ runtime.WaitNoInFlight();
+
+ bool timeout = false;
+ for (ui64 i = 1; i <= 8; ++i) {
+ runtime.Sleep(TDuration::Minutes(30));
+ runtime.SendGetAddr(sender, "notfound.ru", AF_UNSPEC);
+ auto ev = runtime.WaitGetAddr(sender);
+ if (ev->Get()->Status == ARES_ETIMEOUT && i > 2) {
+ timeout = true;
+ break;
+ }
+ UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, ARES_ENOTFOUND,
+ "Iteration " << i << ": " << ev->Get()->ErrorText);
+ }
+
+ UNIT_ASSERT_C(timeout, "DnsResolver did not reply with a timeout");
+ }
+
+ Y_UNIT_TEST(MultipleRequestsAndHosts) {
+ TCachingDnsRuntime runtime;
+ runtime.Initialize();
+ runtime.Start();
+
+ auto sender = runtime.AllocateEdgeActor();
+
+ runtime.SendGetHostByName(sender, "router.asus.com", AF_UNSPEC);
+ runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC);
+ runtime.SendGetHostByName(sender, "yandex.ru", AF_UNSPEC);
+ runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC);
+ runtime.ExpectGetHostByNameSuccess(sender, "192.168.0.1");
+ runtime.ExpectGetAddrSuccess(sender, "192.168.0.1");
+ runtime.ExpectGetHostByNameSuccess(sender, "2a02:6b8:a::a");
+ runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a");
+
+ runtime.SendGetHostByName(sender, "notfound.ru", AF_UNSPEC);
+ runtime.SendGetAddr(sender, "notfound.ru", AF_UNSPEC);
+ runtime.ExpectGetHostByNameError(sender, ARES_ENOTFOUND);
+ runtime.ExpectGetAddrError(sender, ARES_ENOTFOUND);
+ }
+
+ Y_UNIT_TEST(DisabledIPv6) {
+ TCachingDnsRuntime runtime;
+ runtime.ResolverOptions.AllowIPv6 = false;
+ runtime.Initialize();
+ runtime.Start();
+
+ auto sender = runtime.AllocateEdgeActor();
+
+ runtime.SendGetHostByName(sender, "yandex.ru", AF_UNSPEC);
+ runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC);
+ runtime.ExpectGetHostByNameSuccess(sender, "77.88.55.77");
+ runtime.ExpectGetAddrSuccess(sender, "77.88.55.77");
+
+ runtime.SendGetHostByName(sender, "yandex.ru", AF_INET6);
+ runtime.SendGetAddr(sender, "yandex.ru", AF_INET6);
+ runtime.ExpectGetHostByNameError(sender, ARES_EBADFAMILY);
+ runtime.ExpectGetAddrError(sender, ARES_EBADFAMILY);
+
+ runtime.SendGetHostByName(sender, "yandex.ru", AF_UNSPEC);
+ runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC);
+ runtime.ExpectGetHostByNameSuccess(sender, "77.88.55.77");
+ runtime.ExpectGetAddrSuccess(sender, "77.88.55.77");
+
+ runtime.SendGetHostByName(sender, "notfound.ru", AF_UNSPEC);
+ runtime.SendGetAddr(sender, "notfound.ru", AF_UNSPEC);
+ runtime.ExpectGetHostByNameError(sender, ARES_ENOTFOUND);
+ runtime.ExpectGetAddrError(sender, ARES_ENOTFOUND);
+ }
+
+ Y_UNIT_TEST(DisabledIPv4) {
+ TCachingDnsRuntime runtime;
+ runtime.ResolverOptions.AllowIPv4 = false;
+ runtime.Initialize();
+ runtime.Start();
+
+ auto sender = runtime.AllocateEdgeActor();
+
+ runtime.SendGetHostByName(sender, "router.asus.com", AF_UNSPEC);
+ runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC);
+ runtime.ExpectGetHostByNameError(sender, ARES_ENODATA);
+ runtime.ExpectGetAddrError(sender, ARES_ENODATA);
+
+ runtime.SendGetHostByName(sender, "router.asus.com", AF_INET);
+ runtime.SendGetAddr(sender, "router.asus.com", AF_INET);
+ runtime.ExpectGetHostByNameError(sender, ARES_EBADFAMILY);
+ runtime.ExpectGetAddrError(sender, ARES_EBADFAMILY);
+
+ runtime.SendGetHostByName(sender, "router.asus.com", AF_UNSPEC);
+ runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC);
+ runtime.ExpectGetHostByNameError(sender, ARES_ENODATA);
+ runtime.ExpectGetAddrError(sender, ARES_ENODATA);
+
+ runtime.SendGetHostByName(sender, "notfound.ru", AF_UNSPEC);
+ runtime.SendGetAddr(sender, "notfound.ru", AF_UNSPEC);
+ runtime.ExpectGetHostByNameError(sender, ARES_ENOTFOUND);
+ runtime.ExpectGetAddrError(sender, ARES_ENOTFOUND);
+ }
+
+ Y_UNIT_TEST(PoisonPill) {
+ TCachingDnsRuntime runtime;
+ runtime.Initialize();
+ runtime.Start();
+
+ auto sender = runtime.AllocateEdgeActor();
+
+ runtime.SendGetHostByName(sender, "yandex.ru", AF_UNSPEC);
+ runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC);
+ runtime.Send(new IEventHandle(runtime.Resolver, sender, new TEvents::TEvPoison), 0, true);
+ runtime.ExpectGetHostByNameError(sender, ARES_ECANCELLED);
+ runtime.ExpectGetAddrError(sender, ARES_ECANCELLED);
+ }
+
+}
diff --git a/library/cpp/actors/dnsresolver/dnsresolver_ondemand.cpp b/library/cpp/actors/dnsresolver/dnsresolver_ondemand.cpp
new file mode 100644
index 0000000000..2025162e95
--- /dev/null
+++ b/library/cpp/actors/dnsresolver/dnsresolver_ondemand.cpp
@@ -0,0 +1,64 @@
+#include "dnsresolver.h"
+
+#include <library/cpp/actors/core/hfunc.h>
+
+namespace NActors {
+namespace NDnsResolver {
+
+ class TOnDemandDnsResolver : public TActor<TOnDemandDnsResolver> {
+ public:
+ TOnDemandDnsResolver(TOnDemandDnsResolverOptions options)
+ : TActor(&TThis::StateWork)
+ , Options(std::move(options))
+ { }
+
+ static constexpr EActivityType ActorActivityType() {
+ return DNS_RESOLVER;
+ }
+
+ private:
+ STRICT_STFUNC(StateWork, {
+ cFunc(TEvents::TEvPoison::EventType, PassAway);
+ fFunc(TEvDns::TEvGetHostByName::EventType, Forward);
+ fFunc(TEvDns::TEvGetAddr::EventType, Forward);
+ });
+
+ void Forward(STATEFN_SIG) {
+ ev->Rewrite(ev->GetTypeRewrite(), GetUpstream());
+ TActivationContext::Send(std::move(ev));
+ }
+
+ private:
+ TActorId GetUpstream() {
+ if (Y_UNLIKELY(!CachingResolverId)) {
+ if (Y_LIKELY(!SimpleResolverId)) {
+ SimpleResolverId = RegisterWithSameMailbox(CreateSimpleDnsResolver(Options));
+ }
+ CachingResolverId = RegisterWithSameMailbox(CreateCachingDnsResolver(SimpleResolverId, Options));
+ }
+ return CachingResolverId;
+ }
+
+ void PassAway() override {
+ if (CachingResolverId) {
+ Send(CachingResolverId, new TEvents::TEvPoison);
+ CachingResolverId = { };
+ }
+ if (SimpleResolverId) {
+ Send(SimpleResolverId, new TEvents::TEvPoison);
+ SimpleResolverId = { };
+ }
+ }
+
+ private:
+ TOnDemandDnsResolverOptions Options;
+ TActorId SimpleResolverId;
+ TActorId CachingResolverId;
+ };
+
+ IActor* CreateOnDemandDnsResolver(TOnDemandDnsResolverOptions options) {
+ return new TOnDemandDnsResolver(std::move(options));
+ }
+
+} // namespace NDnsResolver
+} // namespace NActors
diff --git a/library/cpp/actors/dnsresolver/dnsresolver_ondemand_ut.cpp b/library/cpp/actors/dnsresolver/dnsresolver_ondemand_ut.cpp
new file mode 100644
index 0000000000..2758484552
--- /dev/null
+++ b/library/cpp/actors/dnsresolver/dnsresolver_ondemand_ut.cpp
@@ -0,0 +1,24 @@
+#include "dnsresolver.h"
+
+#include <library/cpp/actors/testlib/test_runtime.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NActors;
+using namespace NActors::NDnsResolver;
+
+Y_UNIT_TEST_SUITE(OnDemandDnsResolver) {
+
+ Y_UNIT_TEST(ResolveLocalHost) {
+ TTestActorRuntimeBase runtime;
+ runtime.Initialize();
+ auto sender = runtime.AllocateEdgeActor();
+ auto resolver = runtime.Register(CreateOnDemandDnsResolver());
+ runtime.Send(new IEventHandle(resolver, sender, new TEvDns::TEvGetHostByName("localhost", AF_UNSPEC)),
+ 0, true);
+ auto ev = runtime.GrabEdgeEventRethrow<TEvDns::TEvGetHostByNameResult>(sender);
+ UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, 0, ev->Get()->ErrorText);
+ size_t addrs = ev->Get()->AddrsV4.size() + ev->Get()->AddrsV6.size();
+ UNIT_ASSERT_C(addrs > 0, "Got " << addrs << " addresses");
+ }
+
+}
diff --git a/library/cpp/actors/dnsresolver/dnsresolver_ut.cpp b/library/cpp/actors/dnsresolver/dnsresolver_ut.cpp
new file mode 100644
index 0000000000..0c343a805c
--- /dev/null
+++ b/library/cpp/actors/dnsresolver/dnsresolver_ut.cpp
@@ -0,0 +1,98 @@
+#include "dnsresolver.h"
+
+#include <library/cpp/actors/testlib/test_runtime.h>
+#include <library/cpp/testing/unittest/registar.h>
+#include <util/string/builder.h>
+
+#include <ares.h>
+
+using namespace NActors;
+using namespace NActors::NDnsResolver;
+
+Y_UNIT_TEST_SUITE(DnsResolver) {
+
+ struct TSilentUdpServer {
+ TInetDgramSocket Socket;
+ ui16 Port;
+
+ TSilentUdpServer() {
+ TSockAddrInet addr("127.0.0.1", 0);
+ int err = Socket.Bind(&addr);
+ Y_VERIFY(err == 0, "Cannot bind a udp socket");
+ Port = addr.GetPort();
+ }
+ };
+
+ Y_UNIT_TEST(ResolveLocalHost) {
+ TTestActorRuntimeBase runtime;
+ runtime.Initialize();
+ auto sender = runtime.AllocateEdgeActor();
+ auto resolver = runtime.Register(CreateSimpleDnsResolver());
+ runtime.Send(new IEventHandle(resolver, sender, new TEvDns::TEvGetHostByName("localhost", AF_UNSPEC)),
+ 0, true);
+ auto ev = runtime.GrabEdgeEventRethrow<TEvDns::TEvGetHostByNameResult>(sender);
+ UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, 0, ev->Get()->ErrorText);
+ size_t addrs = ev->Get()->AddrsV4.size() + ev->Get()->AddrsV6.size();
+ UNIT_ASSERT_C(addrs > 0, "Got " << addrs << " addresses");
+ }
+
+ Y_UNIT_TEST(ResolveYandexRu) {
+ TTestActorRuntimeBase runtime;
+ runtime.Initialize();
+ auto sender = runtime.AllocateEdgeActor();
+ auto resolver = runtime.Register(CreateSimpleDnsResolver());
+ runtime.Send(new IEventHandle(resolver, sender, new TEvDns::TEvGetHostByName("yandex.ru", AF_UNSPEC)),
+ 0, true);
+ auto ev = runtime.GrabEdgeEventRethrow<TEvDns::TEvGetHostByNameResult>(sender);
+ UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, 0, ev->Get()->ErrorText);
+ size_t addrs = ev->Get()->AddrsV4.size() + ev->Get()->AddrsV6.size();
+ UNIT_ASSERT_C(addrs > 0, "Got " << addrs << " addresses");
+ }
+
+ Y_UNIT_TEST(GetAddrYandexRu) {
+ TTestActorRuntimeBase runtime;
+ runtime.Initialize();
+ auto sender = runtime.AllocateEdgeActor();
+ auto resolver = runtime.Register(CreateSimpleDnsResolver());
+
+ runtime.Send(new IEventHandle(resolver, sender, new TEvDns::TEvGetAddr("yandex.ru", AF_UNSPEC)),
+ 0, true);
+ auto ev = runtime.GrabEdgeEventRethrow<TEvDns::TEvGetAddrResult>(sender);
+ UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, 0, ev->Get()->ErrorText);
+ UNIT_ASSERT_C(ev->Get()->IsV4() || ev->Get()->IsV6(), "Expect v4 or v6 address");
+ }
+
+ Y_UNIT_TEST(ResolveTimeout) {
+ TSilentUdpServer server;
+ TTestActorRuntimeBase runtime;
+ runtime.Initialize();
+ auto sender = runtime.AllocateEdgeActor();
+ TSimpleDnsResolverOptions options;
+ options.Timeout = TDuration::MilliSeconds(250);
+ options.Attempts = 2;
+ options.Servers.emplace_back(TStringBuilder() << "127.0.0.1:" << server.Port);
+ auto resolver = runtime.Register(CreateSimpleDnsResolver(options));
+ runtime.Send(new IEventHandle(resolver, sender, new TEvDns::TEvGetHostByName("timeout.yandex.ru", AF_INET)),
+ 0, true);
+ auto ev = runtime.GrabEdgeEventRethrow<TEvDns::TEvGetHostByNameResult>(sender);
+ UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, ARES_ETIMEOUT, ev->Get()->ErrorText);
+ }
+
+ Y_UNIT_TEST(ResolveGracefulStop) {
+ TSilentUdpServer server;
+ TTestActorRuntimeBase runtime;
+ runtime.Initialize();
+ auto sender = runtime.AllocateEdgeActor();
+ TSimpleDnsResolverOptions options;
+ options.Timeout = TDuration::Seconds(5);
+ options.Attempts = 5;
+ options.Servers.emplace_back(TStringBuilder() << "127.0.0.1:" << server.Port);
+ auto resolver = runtime.Register(CreateSimpleDnsResolver(options));
+ runtime.Send(new IEventHandle(resolver, sender, new TEvDns::TEvGetHostByName("timeout.yandex.ru", AF_INET)),
+ 0, true);
+ runtime.Send(new IEventHandle(resolver, sender, new TEvents::TEvPoison), 0, true);
+ auto ev = runtime.GrabEdgeEventRethrow<TEvDns::TEvGetHostByNameResult>(sender);
+ UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, ARES_ECANCELLED, ev->Get()->ErrorText);
+ }
+
+}
diff --git a/library/cpp/actors/dnsresolver/ut/ya.make b/library/cpp/actors/dnsresolver/ut/ya.make
new file mode 100644
index 0000000000..ad936bdacd
--- /dev/null
+++ b/library/cpp/actors/dnsresolver/ut/ya.make
@@ -0,0 +1,20 @@
+UNITTEST_FOR(library/cpp/actors/dnsresolver)
+
+OWNER(g:kikimr)
+
+PEERDIR(
+ library/cpp/actors/testlib
+)
+
+SRCS(
+ dnsresolver_caching_ut.cpp
+ dnsresolver_ondemand_ut.cpp
+ dnsresolver_ut.cpp
+)
+
+ADDINCL(contrib/libs/c-ares)
+
+TAG(ya:external)
+REQUIREMENTS(network:full)
+
+END()
diff --git a/library/cpp/actors/dnsresolver/ya.make b/library/cpp/actors/dnsresolver/ya.make
new file mode 100644
index 0000000000..329c56c5b3
--- /dev/null
+++ b/library/cpp/actors/dnsresolver/ya.make
@@ -0,0 +1,20 @@
+LIBRARY()
+
+OWNER(g:kikimr)
+
+SRCS(
+ dnsresolver.cpp
+ dnsresolver_caching.cpp
+ dnsresolver_ondemand.cpp
+)
+
+PEERDIR(
+ library/cpp/actors/core
+ contrib/libs/c-ares
+)
+
+ADDINCL(contrib/libs/c-ares)
+
+END()
+
+RECURSE_FOR_TESTS(ut)
diff --git a/library/cpp/actors/helpers/activeactors.cpp b/library/cpp/actors/helpers/activeactors.cpp
new file mode 100644
index 0000000000..145e97dc57
--- /dev/null
+++ b/library/cpp/actors/helpers/activeactors.cpp
@@ -0,0 +1,2 @@
+#include "activeactors.h"
+
diff --git a/library/cpp/actors/helpers/activeactors.h b/library/cpp/actors/helpers/activeactors.h
new file mode 100644
index 0000000000..0fdb0fab10
--- /dev/null
+++ b/library/cpp/actors/helpers/activeactors.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <library/cpp/actors/core/actor.h>
+#include <library/cpp/actors/core/events.h>
+#include <util/generic/hash_set.h>
+
+namespace NActors {
+
+ ////////////////////////////////////////////////////////////////////////////
+ // TActiveActors
+ // This class helps manage created actors and kill them all on PoisonPill.
+ ////////////////////////////////////////////////////////////////////////////
+ class TActiveActors : public THashSet<TActorId> {
+ public:
+ void Insert(const TActorId &aid) {
+ bool inserted = insert(aid).second;
+ Y_VERIFY(inserted);
+ }
+
+ void Insert(const TActiveActors &moreActors) {
+ for (const auto &aid : moreActors) {
+ Insert(aid);
+ }
+ }
+
+ void Erase(const TActorId &aid) {
+ auto num = erase(aid);
+ Y_VERIFY(num == 1);
+ }
+
+ size_t KillAndClear(const TActorContext &ctx) {
+ size_t s = size(); // number of actors managed
+ for (const auto &x: *this) {
+ ctx.Send(x, new TEvents::TEvPoisonPill());
+ }
+ clear();
+ return s; // how many actors we killed
+ }
+ };
+
+} // NKikimr
+
diff --git a/library/cpp/actors/helpers/flow_controlled_queue.cpp b/library/cpp/actors/helpers/flow_controlled_queue.cpp
new file mode 100644
index 0000000000..d75cc54023
--- /dev/null
+++ b/library/cpp/actors/helpers/flow_controlled_queue.cpp
@@ -0,0 +1,215 @@
+#include "flow_controlled_queue.h"
+
+#include <library/cpp/actors/core/interconnect.h>
+#include <library/cpp/actors/core/hfunc.h>
+#include <library/cpp/actors/util/datetime.h>
+
+#include <util/generic/deque.h>
+#include <util/datetime/cputimer.h>
+#include <util/generic/algorithm.h>
+
+namespace NActors {
+
+class TFlowControlledRequestQueue;
+
+class TFlowControlledRequestActor : public IActor {
+ TFlowControlledRequestQueue * const QueueActor;
+
+ void HandleReply(TAutoPtr<IEventHandle> &ev);
+ void HandleUndelivered(TEvents::TEvUndelivered::TPtr &ev);
+public:
+ const TActorId Source;
+ const ui64 Cookie;
+ const ui32 Flags;
+ const ui64 StartCounter;
+
+ TFlowControlledRequestActor(ui32 activity, TFlowControlledRequestQueue *queue, TActorId source, ui64 cookie, ui32 flags)
+ : IActor(static_cast<TReceiveFunc>(&TFlowControlledRequestActor::StateWait), activity)
+ , QueueActor(queue)
+ , Source(source)
+ , Cookie(cookie)
+ , Flags(flags)
+ , StartCounter(GetCycleCountFast())
+ {}
+
+ STATEFN(StateWait) {
+ switch (ev->GetTypeRewrite()) {
+ hFunc(TEvents::TEvUndelivered, HandleUndelivered);
+ default:
+ HandleReply(ev);
+ }
+ }
+
+ TDuration AccumulatedLatency() const {
+ const ui64 cc = GetCycleCountFast() - StartCounter;
+ return CyclesToDuration(cc);
+ }
+
+ using IActor::PassAway;
+};
+
+class TFlowControlledRequestQueue : public IActor {
+ const TActorId Target;
+ const TFlowControlledQueueConfig Config;
+
+ TDeque<THolder<IEventHandle>> UnhandledRequests;
+ TDeque<TFlowControlledRequestActor *> RegisteredRequests;
+
+ bool Subscribed = false;
+
+ TDuration MinimalSeenLatency;
+
+ bool CanRegister() {
+ const ui64 inFly = RegisteredRequests.size();
+ if (inFly <= Config.MinAllowedInFly) // <= for handling minAllowed == 0
+ return true;
+
+ if (inFly >= Config.MaxAllowedInFly)
+ return false;
+
+ if (Config.TargetDynamicRate) {
+ if (const ui64 dynMax = MinimalSeenLatency.MicroSeconds() * Config.TargetDynamicRate / 1000000) {
+ if (inFly >= dynMax)
+ return false;
+ }
+ }
+
+ const TDuration currentLatency = RegisteredRequests.front()->AccumulatedLatency();
+ if (currentLatency <= Config.MinTrackedLatency)
+ return true;
+
+ if (currentLatency <= MinimalSeenLatency * Config.LatencyFactor)
+ return true;
+
+ return false;
+ }
+
+ void HandleForwardedEvent(TAutoPtr<IEventHandle> &ev) {
+ if (CanRegister()) {
+ RegisterReqActor(ev);
+ } else {
+ UnhandledRequests.emplace_back(ev.Release());
+ }
+ }
+
+ void RegisterReqActor(THolder<IEventHandle> ev) {
+ TFlowControlledRequestActor *reqActor = new TFlowControlledRequestActor(ActivityType, this, ev->Sender, ev->Cookie, ev->Flags);
+ const TActorId reqActorId = RegisterWithSameMailbox(reqActor);
+ RegisteredRequests.emplace_back(reqActor);
+
+ if (!Subscribed && (Target.NodeId() != SelfId().NodeId())) {
+ Send(TActivationContext::InterconnectProxy(Target.NodeId()), new TEvents::TEvSubscribe(), IEventHandle::FlagTrackDelivery);
+ Subscribed = true;
+ }
+
+ TActivationContext::Send(new IEventHandle(Target, reqActorId, ev->ReleaseBase().Release(), IEventHandle::FlagTrackDelivery, ev->Cookie));
+ }
+
+ void PumpQueue() {
+ while (RegisteredRequests && RegisteredRequests.front() == nullptr)
+ RegisteredRequests.pop_front();
+
+ while (UnhandledRequests && CanRegister()) {
+ RegisterReqActor(std::move(UnhandledRequests.front()));
+ UnhandledRequests.pop_front();
+ }
+ }
+
+ void HandleDisconnected() {
+ Subscribed = false;
+
+ const ui32 nodeid = Target.NodeId();
+ for (TFlowControlledRequestActor *reqActor : RegisteredRequests) {
+ if (reqActor) {
+ if (reqActor->Flags & IEventHandle::FlagSubscribeOnSession) {
+ TActivationContext::Send(
+ new IEventHandle(reqActor->Source, TActorId(), new TEvInterconnect::TEvNodeDisconnected(nodeid), 0, reqActor->Cookie)
+ );
+ }
+ reqActor->PassAway();
+ }
+ }
+
+ RegisteredRequests.clear();
+
+ for (auto &ev : UnhandledRequests) {
+ const auto reason = TEvents::TEvUndelivered::Disconnected;
+ if (ev->Flags & IEventHandle::FlagTrackDelivery) {
+ TActivationContext::Send(
+ new IEventHandle(ev->Sender, ev->Recipient, new TEvents::TEvUndelivered(ev->GetTypeRewrite(), reason), 0, ev->Cookie)
+ );
+ }
+ }
+
+ UnhandledRequests.clear();
+ }
+
+ void HandlePoison() {
+ HandleDisconnected();
+
+ if (SelfId().NodeId() != Target.NodeId())
+ Send(TActivationContext::InterconnectProxy(Target.NodeId()), new TEvents::TEvUnsubscribe());
+
+ PassAway();
+ }
+public:
+ TFlowControlledRequestQueue(TActorId target, ui32 activity, const TFlowControlledQueueConfig &config)
+ : IActor(static_cast<TReceiveFunc>(&TFlowControlledRequestQueue::StateWork), activity)
+ , Target(target)
+ , Config(config)
+ , MinimalSeenLatency(TDuration::Seconds(1))
+ {}
+
+ STATEFN(StateWork) {
+ switch (ev->GetTypeRewrite()) {
+ cFunc(TEvInterconnect::TEvNodeDisconnected::EventType, HandleDisconnected);
+ IgnoreFunc(TEvInterconnect::TEvNodeConnected);
+ cFunc(TEvents::TEvUndelivered::EventType, HandleDisconnected);
+ cFunc(TEvents::TEvPoison::EventType, HandlePoison);
+ default:
+ HandleForwardedEvent(ev);
+ }
+ }
+
+ void HandleRequestReply(TAutoPtr<IEventHandle> &ev, TFlowControlledRequestActor *reqActor) {
+ auto it = Find(RegisteredRequests, reqActor);
+ if (it == RegisteredRequests.end())
+ return;
+
+ TActivationContext::Send(ev->Forward(reqActor->Source));
+ const TDuration reqLatency = reqActor->AccumulatedLatency();
+ if (reqLatency < MinimalSeenLatency)
+ MinimalSeenLatency = reqLatency;
+
+ *it = nullptr;
+ PumpQueue();
+ }
+
+ void HandleRequestUndelivered(TEvents::TEvUndelivered::TPtr &ev, TFlowControlledRequestActor *reqActor) {
+ auto it = Find(RegisteredRequests, reqActor);
+ if (it == RegisteredRequests.end())
+ return;
+
+ TActivationContext::Send(ev->Forward(reqActor->Source));
+
+ *it = nullptr;
+ PumpQueue();
+ }
+};
+
+void TFlowControlledRequestActor::HandleReply(TAutoPtr<IEventHandle> &ev) {
+ QueueActor->HandleRequestReply(ev, this);
+ PassAway();
+}
+
+void TFlowControlledRequestActor::HandleUndelivered(TEvents::TEvUndelivered::TPtr &ev) {
+ QueueActor->HandleRequestUndelivered(ev, this);
+ PassAway();
+}
+
+
+IActor* CreateFlowControlledRequestQueue(TActorId targetId, ui32 activity, const TFlowControlledQueueConfig &config) {
+ return new TFlowControlledRequestQueue(targetId, activity, config);
+}
+
+}
diff --git a/library/cpp/actors/helpers/flow_controlled_queue.h b/library/cpp/actors/helpers/flow_controlled_queue.h
new file mode 100644
index 0000000000..d250405304
--- /dev/null
+++ b/library/cpp/actors/helpers/flow_controlled_queue.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <library/cpp/actors/core/actor.h>
+
+namespace NActors {
+
+ struct TFlowControlledQueueConfig {
+ ui32 MinAllowedInFly = 20;
+ ui32 MaxAllowedInFly = 100;
+ ui32 TargetDynamicRate = 0;
+
+ TDuration MinTrackedLatency = TDuration::MilliSeconds(20);
+ ui32 LatencyFactor = 4;
+ };
+
+ IActor* CreateFlowControlledRequestQueue(TActorId targetId, ui32 activity = IActor::ACTORLIB_COMMON, const TFlowControlledQueueConfig &config = TFlowControlledQueueConfig());
+
+}
diff --git a/library/cpp/actors/helpers/future_callback.h b/library/cpp/actors/helpers/future_callback.h
new file mode 100644
index 0000000000..8ca0d99fda
--- /dev/null
+++ b/library/cpp/actors/helpers/future_callback.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <library/cpp/actors/core/actor.h>
+#include <library/cpp/actors/core/hfunc.h>
+
+namespace NActors {
+
+template <typename EventType>
+struct TActorFutureCallback : TActor<TActorFutureCallback<EventType>> {
+ using TCallback = std::function<void(TAutoPtr<TEventHandle<EventType>>&)>;
+ using TBase = TActor<TActorFutureCallback<EventType>>;
+ TCallback Callback;
+
+ static constexpr IActor::EActivityType ActorActivityType() {
+ return IActor::ACTOR_FUTURE_CALLBACK;
+ }
+
+ TActorFutureCallback(TCallback&& callback)
+ : TBase(&TActorFutureCallback::StateWaitForEvent)
+ , Callback(std::move(callback))
+ {}
+
+ STRICT_STFUNC(StateWaitForEvent,
+ HFunc(EventType, Handle)
+ )
+
+ void Handle(typename EventType::TPtr ev, const TActorContext& ctx) {
+ Callback(ev);
+ TBase::Die(ctx);
+ }
+};
+
+} // NActors
diff --git a/library/cpp/actors/helpers/mon_histogram_helper.h b/library/cpp/actors/helpers/mon_histogram_helper.h
new file mode 100644
index 0000000000..a9a57e3823
--- /dev/null
+++ b/library/cpp/actors/helpers/mon_histogram_helper.h
@@ -0,0 +1,86 @@
+#pragma once
+
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+
+#include <util/string/cast.h>
+
+namespace NActors {
+ namespace NMon {
+ class THistogramCounterHelper {
+ public:
+ THistogramCounterHelper()
+ : FirstBucketVal(0)
+ , BucketCount(0)
+ {
+ }
+
+ THistogramCounterHelper(const THistogramCounterHelper&) = default;
+
+ void Init(NMonitoring::TDynamicCounters* group, const TString& baseName, const TString& unit,
+ ui64 firstBucket, ui64 bucketCnt, bool useSensorLabelName = true)
+ {
+ Y_ASSERT(FirstBucketVal == 0);
+ Y_ASSERT(BucketCount == 0);
+
+ FirstBucketVal = firstBucket;
+ BucketCount = bucketCnt;
+ BucketsHolder.reserve(BucketCount);
+ Buckets.reserve(BucketCount);
+ for (size_t i = 0; i < BucketCount; ++i) {
+ TString bucketName = GetBucketName(i) + " " + unit;
+ auto labelName = useSensorLabelName ? "sensor" : "name";
+ BucketsHolder.push_back(group->GetSubgroup(labelName, baseName)->GetNamedCounter("range", bucketName, true));
+ Buckets.push_back(BucketsHolder.back().Get());
+ }
+ }
+
+ void Add(ui64 val) {
+ Y_ASSERT(FirstBucketVal != 0);
+ Y_ASSERT(BucketCount != 0);
+ Y_VERIFY(val <= (1ULL << 63ULL));
+ size_t ind = 0;
+ if (val > FirstBucketVal) {
+ ind = GetValueBitCount((2 * val - 1) / FirstBucketVal) - 1;
+ if (ind >= BucketCount) {
+ ind = BucketCount - 1;
+ }
+ }
+ Buckets[ind]->Inc();
+ }
+
+ ui64 GetBucketCount() const {
+ return BucketCount;
+ }
+
+ ui64 GetBucketValue(size_t index) const {
+ Y_ASSERT(index < BucketCount);
+ return Buckets[index]->Val();
+ }
+
+ void SetBucketValue(ui64 index, ui64 value) {
+ Y_ASSERT(index < BucketCount);
+ *Buckets[index] = value;
+ }
+
+ private:
+ TString GetBucketName(size_t ind) const {
+ Y_ASSERT(FirstBucketVal != 0);
+ Y_ASSERT(BucketCount != 0);
+ Y_ASSERT(ind < BucketCount);
+ if (ind + 1 < BucketCount) {
+ return ToString<ui64>(FirstBucketVal << ind);
+ } else {
+ // Last slot is up to +INF
+ return "INF";
+ }
+ }
+
+ private:
+ ui64 FirstBucketVal;
+ ui64 BucketCount;
+ TVector<NMonitoring::TDynamicCounters::TCounterPtr> BucketsHolder;
+ TVector<NMonitoring::TDeprecatedCounter*> Buckets;
+ };
+
+ }
+}
diff --git a/library/cpp/actors/helpers/pool_stats_collector.h b/library/cpp/actors/helpers/pool_stats_collector.h
new file mode 100644
index 0000000000..61d0b45780
--- /dev/null
+++ b/library/cpp/actors/helpers/pool_stats_collector.h
@@ -0,0 +1,314 @@
+#pragma once
+
+#include <library/cpp/actors/core/actor_bootstrapped.h>
+#include <library/cpp/actors/core/actorsystem.h>
+#include <library/cpp/actors/core/executor_thread.h>
+#include <library/cpp/actors/core/hfunc.h>
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+
+#include <util/generic/vector.h>
+#include <util/generic/xrange.h>
+#include <util/string/printf.h>
+
+namespace NActors {
+
+// Periodically collects stats from executor threads and exposes them as mon counters
+class TStatsCollectingActor : public TActorBootstrapped<TStatsCollectingActor> {
+private:
+ struct THistogramCounters {
+ void Init(NMonitoring::TDynamicCounters* group, const TString& baseName, const TString& unit, ui64 maxVal) {
+ for (size_t i = 0; (1ull<<i) <= maxVal; ++i) {
+ TString bucketName = ToString(1ull<<i) + " " + unit;
+ Buckets.push_back(group->GetSubgroup("sensor", baseName)->GetNamedCounter("range", bucketName, true));
+ }
+ Buckets.push_back(group->GetSubgroup("sensor", baseName)->GetNamedCounter("range", "INF", true));
+ }
+
+ void Set(const TLogHistogram& data) {
+ ui32 i = 0;
+ for (;i < Y_ARRAY_SIZE(data.Buckets) && i < Buckets.size()-1; ++i)
+ *Buckets[i] = data.Buckets[i];
+ ui64 last = 0;
+ for (;i < Y_ARRAY_SIZE(data.Buckets); ++i)
+ last += data.Buckets[i];
+ *Buckets.back() = last;
+ }
+
+ void Set(const TLogHistogram& data, double factor) {
+ ui32 i = 0;
+ for (;i < Y_ARRAY_SIZE(data.Buckets) && i < Buckets.size()-1; ++i)
+ *Buckets[i] = data.Buckets[i]*factor;
+ ui64 last = 0;
+ for (;i < Y_ARRAY_SIZE(data.Buckets); ++i)
+ last += data.Buckets[i];
+ *Buckets.back() = last*factor;
+ }
+
+ private:
+ TVector<NMonitoring::TDynamicCounters::TCounterPtr> Buckets;
+ };
+
+ struct TActivityStats {
+ void Init(NMonitoring::TDynamicCounterPtr group) {
+ Group = group;
+
+ ElapsedMicrosecByActivityBuckets.resize(GetActivityTypeCount());
+ ReceivedEventsByActivityBuckets.resize(GetActivityTypeCount());
+ ActorsAliveByActivityBuckets.resize(GetActivityTypeCount());
+ ScheduledEventsByActivityBuckets.resize(GetActivityTypeCount());
+ }
+
+ void Set(const TExecutorThreadStats& stats) {
+ for (ui32 i : xrange(stats.MaxActivityType())) {
+ Y_VERIFY(i < GetActivityTypeCount());
+ ui64 ticks = stats.ElapsedTicksByActivity[i];
+ ui64 events = stats.ReceivedEventsByActivity[i];
+ ui64 actors = stats.ActorsAliveByActivity[i];
+ ui64 scheduled = stats.ScheduledEventsByActivity[i];
+
+ if (!ActorsAliveByActivityBuckets[i]) {
+ if (ticks || events || actors || scheduled) {
+ InitCountersForActivity(i);
+ } else {
+ continue;
+ }
+ }
+
+ *ElapsedMicrosecByActivityBuckets[i] = ::NHPTimer::GetSeconds(ticks)*1000000;
+ *ReceivedEventsByActivityBuckets[i] = events;
+ *ActorsAliveByActivityBuckets[i] = actors;
+ *ScheduledEventsByActivityBuckets[i] = scheduled;
+ }
+ }
+
+ private:
+ void InitCountersForActivity(ui32 activityType) {
+ Y_VERIFY(activityType < GetActivityTypeCount());
+
+ auto bucketName = TString(GetActivityTypeName(activityType));
+
+ ElapsedMicrosecByActivityBuckets[activityType] =
+ Group->GetSubgroup("sensor", "ElapsedMicrosecByActivity")->GetNamedCounter("activity", bucketName, true);
+ ReceivedEventsByActivityBuckets[activityType] =
+ Group->GetSubgroup("sensor", "ReceivedEventsByActivity")->GetNamedCounter("activity", bucketName, true);
+ ActorsAliveByActivityBuckets[activityType] =
+ Group->GetSubgroup("sensor", "ActorsAliveByActivity")->GetNamedCounter("activity", bucketName, false);
+ ScheduledEventsByActivityBuckets[activityType] =
+ Group->GetSubgroup("sensor", "ScheduledEventsByActivity")->GetNamedCounter("activity", bucketName, true);
+ }
+
+ private:
+ NMonitoring::TDynamicCounterPtr Group;
+
+ TVector<NMonitoring::TDynamicCounters::TCounterPtr> ElapsedMicrosecByActivityBuckets;
+ TVector<NMonitoring::TDynamicCounters::TCounterPtr> ReceivedEventsByActivityBuckets;
+ TVector<NMonitoring::TDynamicCounters::TCounterPtr> ActorsAliveByActivityBuckets;
+ TVector<NMonitoring::TDynamicCounters::TCounterPtr> ScheduledEventsByActivityBuckets;
+ };
+
+ struct TExecutorPoolCounters {
+ TIntrusivePtr<NMonitoring::TDynamicCounters> PoolGroup;
+
+ NMonitoring::TDynamicCounters::TCounterPtr SentEvents;
+ NMonitoring::TDynamicCounters::TCounterPtr ReceivedEvents;
+ NMonitoring::TDynamicCounters::TCounterPtr PreemptedEvents;
+ NMonitoring::TDynamicCounters::TCounterPtr NonDeliveredEvents;
+ NMonitoring::TDynamicCounters::TCounterPtr DestroyedActors;
+ NMonitoring::TDynamicCounters::TCounterPtr EmptyMailboxActivation;
+ NMonitoring::TDynamicCounters::TCounterPtr CpuMicrosec;
+ NMonitoring::TDynamicCounters::TCounterPtr ElapsedMicrosec;
+ NMonitoring::TDynamicCounters::TCounterPtr ParkedMicrosec;
+ NMonitoring::TDynamicCounters::TCounterPtr ActorRegistrations;
+ NMonitoring::TDynamicCounters::TCounterPtr ActorsAlive;
+ NMonitoring::TDynamicCounters::TCounterPtr AllocatedMailboxes;
+ NMonitoring::TDynamicCounters::TCounterPtr MailboxPushedOutBySoftPreemption;
+ NMonitoring::TDynamicCounters::TCounterPtr MailboxPushedOutByTime;
+ NMonitoring::TDynamicCounters::TCounterPtr MailboxPushedOutByEventCount;
+
+ THistogramCounters LegacyActivationTimeHistogram;
+ NMonitoring::THistogramPtr ActivationTimeHistogram;
+ THistogramCounters LegacyEventDeliveryTimeHistogram;
+ NMonitoring::THistogramPtr EventDeliveryTimeHistogram;
+ THistogramCounters LegacyEventProcessingCountHistogram;
+ NMonitoring::THistogramPtr EventProcessingCountHistogram;
+ THistogramCounters LegacyEventProcessingTimeHistogram;
+ NMonitoring::THistogramPtr EventProcessingTimeHistogram;
+
+ TActivityStats ActivityStats;
+ NMonitoring::TDynamicCounters::TCounterPtr MaxUtilizationTime;
+
+ double Usage = 0;
+ double LastElapsedSeconds = 0;
+ THPTimer UsageTimer;
+ TString Name;
+ ui32 Threads;
+
+ void Init(NMonitoring::TDynamicCounters* group, const TString& poolName, ui32 threads) {
+ LastElapsedSeconds = 0;
+ Usage = 0;
+ UsageTimer.Reset();
+ Name = poolName;
+ Threads = threads;
+
+ PoolGroup = group->GetSubgroup("execpool", poolName);
+
+ SentEvents = PoolGroup->GetCounter("SentEvents", true);
+ ReceivedEvents = PoolGroup->GetCounter("ReceivedEvents", true);
+ PreemptedEvents = PoolGroup->GetCounter("PreemptedEvents", true);
+ NonDeliveredEvents = PoolGroup->GetCounter("NonDeliveredEvents", true);
+ DestroyedActors = PoolGroup->GetCounter("DestroyedActors", true);
+ CpuMicrosec = PoolGroup->GetCounter("CpuMicrosec", true);
+ ElapsedMicrosec = PoolGroup->GetCounter("ElapsedMicrosec", true);
+ ParkedMicrosec = PoolGroup->GetCounter("ParkedMicrosec", true);
+ EmptyMailboxActivation = PoolGroup->GetCounter("EmptyMailboxActivation", true);
+ ActorRegistrations = PoolGroup->GetCounter("ActorRegistrations", true);
+ ActorsAlive = PoolGroup->GetCounter("ActorsAlive", false);
+ AllocatedMailboxes = PoolGroup->GetCounter("AllocatedMailboxes", false);
+ MailboxPushedOutBySoftPreemption = PoolGroup->GetCounter("MailboxPushedOutBySoftPreemption", true);
+ MailboxPushedOutByTime = PoolGroup->GetCounter("MailboxPushedOutByTime", true);
+ MailboxPushedOutByEventCount = PoolGroup->GetCounter("MailboxPushedOutByEventCount", true);
+
+ LegacyActivationTimeHistogram.Init(PoolGroup.Get(), "ActivationTime", "usec", 5*1000*1000);
+ ActivationTimeHistogram = PoolGroup->GetHistogram(
+ "ActivationTimeUs", NMonitoring::ExponentialHistogram(24, 2, 1));
+ LegacyEventDeliveryTimeHistogram.Init(PoolGroup.Get(), "EventDeliveryTime", "usec", 5*1000*1000);
+ EventDeliveryTimeHistogram = PoolGroup->GetHistogram(
+ "EventDeliveryTimeUs", NMonitoring::ExponentialHistogram(24, 2, 1));
+ LegacyEventProcessingCountHistogram.Init(PoolGroup.Get(), "EventProcessingCount", "usec", 5*1000*1000);
+ EventProcessingCountHistogram = PoolGroup->GetHistogram(
+ "EventProcessingCountUs", NMonitoring::ExponentialHistogram(24, 2, 1));
+ LegacyEventProcessingTimeHistogram.Init(PoolGroup.Get(), "EventProcessingTime", "usec", 5*1000*1000);
+ EventProcessingTimeHistogram = PoolGroup->GetHistogram(
+ "EventProcessingTimeUs", NMonitoring::ExponentialHistogram(24, 2, 1));
+
+ ActivityStats.Init(PoolGroup.Get());
+
+ MaxUtilizationTime = PoolGroup->GetCounter("MaxUtilizationTime", true);
+ }
+
+ void Set(const TExecutorPoolStats& poolStats, const TExecutorThreadStats& stats, ui32 numThreads) {
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+ *SentEvents = stats.SentEvents;
+ *ReceivedEvents = stats.ReceivedEvents;
+ *PreemptedEvents = stats.PreemptedEvents;
+ *NonDeliveredEvents = stats.NonDeliveredEvents;
+ *DestroyedActors = stats.PoolDestroyedActors;
+ *EmptyMailboxActivation = stats.EmptyMailboxActivation;
+ *CpuMicrosec = stats.CpuNs / 1000;
+ *ElapsedMicrosec = ::NHPTimer::GetSeconds(stats.ElapsedTicks)*1000000;
+ *ParkedMicrosec = ::NHPTimer::GetSeconds(stats.ParkedTicks)*1000000;
+ *ActorRegistrations = stats.PoolActorRegistrations;
+ *ActorsAlive = stats.PoolActorRegistrations - stats.PoolDestroyedActors;
+ *AllocatedMailboxes = stats.PoolAllocatedMailboxes;
+ *MailboxPushedOutBySoftPreemption = stats.MailboxPushedOutBySoftPreemption;
+ *MailboxPushedOutByTime = stats.MailboxPushedOutByTime;
+ *MailboxPushedOutByEventCount = stats.MailboxPushedOutByEventCount;
+
+ LegacyActivationTimeHistogram.Set(stats.ActivationTimeHistogram);
+ ActivationTimeHistogram->Reset();
+ ActivationTimeHistogram->Collect(stats.ActivationTimeHistogram);
+
+ LegacyEventDeliveryTimeHistogram.Set(stats.EventDeliveryTimeHistogram);
+ EventDeliveryTimeHistogram->Reset();
+ EventDeliveryTimeHistogram->Collect(stats.EventDeliveryTimeHistogram);
+
+ LegacyEventProcessingCountHistogram.Set(stats.EventProcessingCountHistogram);
+ EventProcessingCountHistogram->Reset();
+ EventProcessingCountHistogram->Collect(stats.EventProcessingCountHistogram);
+
+ double toMicrosec = 1000000 / NHPTimer::GetClockRate();
+ LegacyEventProcessingTimeHistogram.Set(stats.EventProcessingTimeHistogram, toMicrosec);
+ EventProcessingTimeHistogram->Reset();
+ for (ui32 i = 0; i < stats.EventProcessingTimeHistogram.Count(); ++i) {
+ EventProcessingTimeHistogram->Collect(
+ stats.EventProcessingTimeHistogram.UpperBound(i),
+ stats.EventProcessingTimeHistogram.Value(i) * toMicrosec);
+ }
+
+ ActivityStats.Set(stats);
+
+ *MaxUtilizationTime = poolStats.MaxUtilizationTime;
+
+ double seconds = UsageTimer.PassedReset();
+
+ // TODO[serxa]: It doesn't account for contention. Use 1 - parkedTicksDelta / seconds / numThreads KIKIMR-11916
+ const double elapsed = NHPTimer::GetSeconds(stats.ElapsedTicks);
+ const double currentUsage = numThreads > 0 ? ((elapsed - LastElapsedSeconds) / seconds / numThreads) : 0;
+ LastElapsedSeconds = elapsed;
+
+ // update usage factor according to smoothness
+ const double smoothness = 0.5;
+ Usage = currentUsage * smoothness + Usage * (1.0 - smoothness);
+#else
+ Y_UNUSED(poolStats);
+ Y_UNUSED(stats);
+ Y_UNUSED(numThreads);
+#endif
+ }
+ };
+
+public:
+ static constexpr IActor::EActivityType ActorActivityType() {
+ return IActor::ACTORLIB_STATS;
+ }
+
+ TStatsCollectingActor(
+ ui32 intervalSec,
+ const TActorSystemSetup& setup,
+ NMonitoring::TDynamicCounterPtr counters)
+ : IntervalSec(intervalSec)
+ , Counters(counters)
+ {
+ PoolCounters.resize(setup.GetExecutorsCount());
+ for (size_t poolId = 0; poolId < PoolCounters.size(); ++poolId) {
+ PoolCounters[poolId].Init(Counters.Get(), setup.GetPoolName(poolId), setup.GetThreads(poolId));
+ }
+ }
+
+ void Bootstrap(const TActorContext& ctx) {
+ ctx.Schedule(TDuration::Seconds(IntervalSec), new TEvents::TEvWakeup());
+ Become(&TThis::StateWork);
+ }
+
+ STFUNC(StateWork) {
+ switch (ev->GetTypeRewrite()) {
+ CFunc(TEvents::TSystem::Wakeup, Wakeup);
+ }
+ }
+
+private:
+ virtual void OnWakeup(const TActorContext &ctx) {
+ Y_UNUSED(ctx);
+ }
+
+ void Wakeup(const TActorContext &ctx) {
+ for (size_t poolId = 0; poolId < PoolCounters.size(); ++poolId) {
+ TVector<TExecutorThreadStats> stats;
+ TExecutorPoolStats poolStats;
+ ctx.ExecutorThread.ActorSystem->GetPoolStats(poolId, poolStats, stats);
+ SetAggregatedCounters(PoolCounters[poolId], poolStats, stats);
+ }
+
+ OnWakeup(ctx);
+
+ ctx.Schedule(TDuration::Seconds(IntervalSec), new TEvents::TEvWakeup());
+ }
+
+ void SetAggregatedCounters(TExecutorPoolCounters& poolCounters, TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& stats) {
+ // Sum all per-thread counters into the 0th element
+ for (ui32 idx = 1; idx < stats.size(); ++idx) {
+ stats[0].Aggregate(stats[idx]);
+ }
+ if (stats.size()) {
+ poolCounters.Set(poolStats, stats[0], stats.size() - 1);
+ }
+ }
+
+protected:
+ const ui32 IntervalSec;
+ NMonitoring::TDynamicCounterPtr Counters;
+
+ TVector<TExecutorPoolCounters> PoolCounters;
+};
+
+} // NActors
diff --git a/library/cpp/actors/helpers/selfping_actor.cpp b/library/cpp/actors/helpers/selfping_actor.cpp
new file mode 100644
index 0000000000..f9bfaf8dc0
--- /dev/null
+++ b/library/cpp/actors/helpers/selfping_actor.cpp
@@ -0,0 +1,183 @@
+#include "selfping_actor.h"
+
+#include <library/cpp/actors/core/actor_bootstrapped.h>
+#include <library/cpp/actors/core/hfunc.h>
+
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+#include <library/cpp/sliding_window/sliding_window.h>
+
+namespace NActors {
+
+namespace {
+
+struct TEvPing: public TEventLocal<TEvPing, TEvents::THelloWorld::Ping> {
+ TEvPing(double timeStart)
+ : TimeStart(timeStart)
+ {}
+
+ const double TimeStart;
+};
+
+template <class TValueType_>
+struct TAvgOperation {
+ struct TValueType {
+ ui64 Count = 0;
+ TValueType_ Sum = TValueType_();
+ };
+ using TValueVector = TVector<TValueType>;
+
+ static constexpr TValueType InitialValue() {
+ return TValueType(); // zero
+ }
+
+ // Updates value in current bucket and returns window value
+ static TValueType UpdateBucket(TValueType windowValue, TValueVector& buckets, size_t index, TValueType newVal) {
+ Y_ASSERT(index < buckets.size());
+ buckets[index].Sum += newVal.Sum;
+ buckets[index].Count += newVal.Count;
+ windowValue.Sum += newVal.Sum;
+ windowValue.Count += newVal.Count;
+ return windowValue;
+ }
+
+ static TValueType ClearBuckets(TValueType windowValue, TValueVector& buckets, size_t firstElemIndex, size_t bucketsToClear) {
+ Y_ASSERT(!buckets.empty());
+ Y_ASSERT(firstElemIndex < buckets.size());
+ Y_ASSERT(bucketsToClear <= buckets.size());
+
+ const size_t arraySize = buckets.size();
+ for (size_t i = 0; i < bucketsToClear; ++i) {
+ TValueType& curVal = buckets[firstElemIndex];
+ windowValue.Sum -= curVal.Sum;
+ windowValue.Count -= curVal.Count;
+ curVal = InitialValue();
+ firstElemIndex = (firstElemIndex + 1) % arraySize;
+ }
+ return windowValue;
+ }
+
+};
+
+class TSelfPingActor : public TActorBootstrapped<TSelfPingActor> {
+private:
+ const TDuration SendInterval;
+ const NMonitoring::TDynamicCounters::TCounterPtr Counter;
+ const NMonitoring::TDynamicCounters::TCounterPtr CalculationTimeCounter;
+
+ NSlidingWindow::TSlidingWindow<NSlidingWindow::TMaxOperation<ui64>> SlidingWindow;
+ NSlidingWindow::TSlidingWindow<TAvgOperation<ui64>> CalculationSlidingWindow;
+
+ THPTimer Timer;
+
+public:
+ static constexpr auto ActorActivityType() {
+ return SELF_PING_ACTOR;
+ }
+
+ TSelfPingActor(TDuration sendInterval, const NMonitoring::TDynamicCounters::TCounterPtr& counter,
+ const NMonitoring::TDynamicCounters::TCounterPtr& calculationTimeCounter)
+ : SendInterval(sendInterval)
+ , Counter(counter)
+ , CalculationTimeCounter(calculationTimeCounter)
+ , SlidingWindow(TDuration::Seconds(15), 100)
+ , CalculationSlidingWindow(TDuration::Seconds(15), 100)
+ {
+ }
+
+ void Bootstrap(const TActorContext& ctx)
+ {
+ Become(&TSelfPingActor::RunningState);
+ SchedulePing(ctx, Timer.Passed());
+ }
+
+ STFUNC(RunningState)
+ {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(TEvPing, HandlePing);
+ default:
+ Y_FAIL("TSelfPingActor::RunningState: unexpected event 0x%08" PRIx32, ev->GetTypeRewrite());
+ }
+ }
+
+ ui64 MeasureTaskDurationNs() {
+ // Prepare worm test data
+ // 11 * 11 * 3 * 8 = 2904 bytes, fits in L1 cache
+ constexpr ui64 Size = 11;
+ // Align the data to reduce random alignment effects
+ alignas(64) TStackVec<ui64, Size * Size * 3> data;
+ ui64 s = 0;
+ NHPTimer::STime beginTime;
+ NHPTimer::STime endTime;
+ // Prepare the data
+ data.resize(Size * Size * 3);
+ for (ui64 matrixIdx = 0; matrixIdx < 3; ++matrixIdx) {
+ for (ui64 y = 0; y < Size; ++y) {
+ for (ui64 x = 0; x < Size; ++x) {
+ data[matrixIdx * (Size * Size) + y * Size + x] = y * Size + x;
+ }
+ }
+ }
+ // Warm-up the cache
+ NHPTimer::GetTime(&beginTime);
+ for (ui64 idx = 0; idx < data.size(); ++idx) {
+ s += data[idx];
+ }
+ NHPTimer::GetTime(&endTime);
+ s += (ui64)(1000000.0 * NHPTimer::GetSeconds(endTime - beginTime));
+
+ // Measure the CPU performance
+ // C = A * B with injected dependency to s
+ NHPTimer::GetTime(&beginTime);
+ for (ui64 y = 0; y < Size; ++y) {
+ for (ui64 x = 0; x < Size; ++x) {
+ for (ui64 i = 0; i < Size; ++i) {
+ s += data[y * Size + i] * data[Size * Size + i * Size + x];
+ }
+ data[2 * Size * Size + y * Size + x] = s;
+ s = 0;
+ }
+ }
+ for (ui64 idx = 0; idx < data.size(); ++idx) {
+ s += data[idx];
+ }
+ NHPTimer::GetTime(&endTime);
+ // Prepare the result
+ double d = 1000000000.0 * (NHPTimer::GetSeconds(endTime - beginTime) + 0.000000001 * (s & 1));
+ return (ui64)d;
+ }
+
+ void HandlePing(TEvPing::TPtr &ev, const TActorContext &ctx)
+ {
+ const auto now = ctx.Now();
+ const double hpNow = Timer.Passed();
+ const auto& e = *ev->Get();
+ const double passedTime = hpNow - e.TimeStart;
+ const ui64 delayUs = passedTime > 0.0 ? static_cast<ui64>(passedTime * 1e6) : 0;
+
+ *Counter = SlidingWindow.Update(delayUs, now);
+
+ ui64 d = MeasureTaskDurationNs();
+ auto res = CalculationSlidingWindow.Update({1, d}, now);
+ *CalculationTimeCounter = double(res.Sum) / double(res.Count + 1);
+
+ SchedulePing(ctx, hpNow);
+ }
+
+private:
+ void SchedulePing(const TActorContext &ctx, double hpNow) const
+ {
+ ctx.Schedule(SendInterval, new TEvPing(hpNow));
+ }
+};
+
+} // namespace
+
+IActor* CreateSelfPingActor(
+ TDuration sendInterval,
+ const NMonitoring::TDynamicCounters::TCounterPtr& counter,
+ const NMonitoring::TDynamicCounters::TCounterPtr& calculationTimeCounter)
+{
+ return new TSelfPingActor(sendInterval, counter, calculationTimeCounter);
+}
+
+} // NActors
diff --git a/library/cpp/actors/helpers/selfping_actor.h b/library/cpp/actors/helpers/selfping_actor.h
new file mode 100644
index 0000000000..d7d07f9fa8
--- /dev/null
+++ b/library/cpp/actors/helpers/selfping_actor.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <library/cpp/actors/core/actor.h>
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+
+namespace NActors {
+
+NActors::IActor* CreateSelfPingActor(
+ TDuration sendInterval,
+ const NMonitoring::TDynamicCounters::TCounterPtr& counter,
+ const NMonitoring::TDynamicCounters::TCounterPtr& calculationTimeCounter);
+
+} // NActors
diff --git a/library/cpp/actors/helpers/selfping_actor_ut.cpp b/library/cpp/actors/helpers/selfping_actor_ut.cpp
new file mode 100644
index 0000000000..459635fa24
--- /dev/null
+++ b/library/cpp/actors/helpers/selfping_actor_ut.cpp
@@ -0,0 +1,45 @@
+#include "selfping_actor.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/actors/testlib/test_runtime.h>
+
+namespace NActors {
+namespace Tests {
+
+THolder<TTestActorRuntimeBase> CreateRuntime() {
+ auto runtime = MakeHolder<TTestActorRuntimeBase>();
+ runtime->SetScheduledEventFilter([](auto&&, auto&&, auto&&, auto&&) { return false; });
+ runtime->Initialize();
+ return runtime;
+}
+
+Y_UNIT_TEST_SUITE(TSelfPingTest) {
+ Y_UNIT_TEST(Basic)
+ {
+ auto runtime = CreateRuntime();
+
+ //const TActorId sender = runtime.AllocateEdgeActor();
+
+ NMonitoring::TDynamicCounters::TCounterPtr counter(new NMonitoring::TCounterForPtr());
+ NMonitoring::TDynamicCounters::TCounterPtr counter2(new NMonitoring::TCounterForPtr());
+
+ auto actor = CreateSelfPingActor(
+ TDuration::MilliSeconds(100), // sendInterval (unused in test)
+ counter, counter2);
+
+ UNIT_ASSERT_VALUES_EQUAL(counter->Val(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 0);
+
+ const TActorId actorId = runtime->Register(actor);
+ Y_UNUSED(actorId);
+
+ //runtime.Send(new IEventHandle(actorId, sender, new TEvSelfPing::TEvPing(0.0)));
+
+ // TODO check after events are handled
+ //Sleep(TDuration::Seconds(1));
+ //UNIT_ASSERT((intmax_t)counter->Val() >= (intmax_t)Delay.MicroSeconds());
+ }
+}
+
+} // namespace Tests
+} // namespace NActors
diff --git a/library/cpp/actors/helpers/ut/ya.make b/library/cpp/actors/helpers/ut/ya.make
new file mode 100644
index 0000000000..cba4d6d1d9
--- /dev/null
+++ b/library/cpp/actors/helpers/ut/ya.make
@@ -0,0 +1,36 @@
+UNITTEST_FOR(library/cpp/actors/helpers)
+
+OWNER(
+ alexvru
+ g:kikimr
+)
+
+FORK_SUBTESTS()
+IF (SANITIZER_TYPE)
+ SIZE(LARGE)
+ TIMEOUT(1200)
+ TAG(ya:fat)
+ SPLIT_FACTOR(20)
+ REQUIREMENTS(
+ ram:32
+ )
+ELSE()
+ SIZE(MEDIUM)
+ TIMEOUT(600)
+ REQUIREMENTS(
+ ram:16
+ )
+ENDIF()
+
+
+PEERDIR(
+ library/cpp/actors/interconnect
+ library/cpp/actors/testlib
+ library/cpp/actors/core
+)
+
+SRCS(
+ selfping_actor_ut.cpp
+)
+
+END()
diff --git a/library/cpp/actors/helpers/ya.make b/library/cpp/actors/helpers/ya.make
new file mode 100644
index 0000000000..d8771179de
--- /dev/null
+++ b/library/cpp/actors/helpers/ya.make
@@ -0,0 +1,25 @@
+LIBRARY()
+
+OWNER(g:kikimr)
+
+SRCS(
+ activeactors.cpp
+ activeactors.h
+ flow_controlled_queue.cpp
+ flow_controlled_queue.h
+ future_callback.h
+ mon_histogram_helper.h
+ selfping_actor.cpp
+)
+
+PEERDIR(
+ library/cpp/actors/core
+ library/cpp/monlib/dynamic_counters
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+)
+
diff --git a/library/cpp/actors/http/http.cpp b/library/cpp/actors/http/http.cpp
new file mode 100644
index 0000000000..7125f9d8b0
--- /dev/null
+++ b/library/cpp/actors/http/http.cpp
@@ -0,0 +1,653 @@
+#include "http.h"
+#include <library/cpp/string_utils/quote/quote.h>
+
+inline TStringBuf operator +(TStringBuf l, TStringBuf r) {
+ if (l.empty()) {
+ return r;
+ }
+ if (r.empty()) {
+ return l;
+ }
+ if (l.end() == r.begin()) {
+ return TStringBuf(l.data(), l.size() + r.size());
+ }
+ if (r.end() == l.begin()) {
+ return TStringBuf(r.data(), l.size() + r.size());
+ }
+ Y_FAIL("oops");
+ return TStringBuf();
+}
+
+inline TStringBuf operator +=(TStringBuf& l, TStringBuf r) {
+ return l = l + r;
+}
+
+namespace NHttp {
+
+template <> TStringBuf THttpRequest::GetName<&THttpRequest::Host>() { return "Host"; }
+template <> TStringBuf THttpRequest::GetName<&THttpRequest::Accept>() { return "Accept"; }
+template <> TStringBuf THttpRequest::GetName<&THttpRequest::Connection>() { return "Connection"; }
+template <> TStringBuf THttpRequest::GetName<&THttpRequest::ContentType>() { return "Content-Type"; }
+template <> TStringBuf THttpRequest::GetName<&THttpRequest::ContentLength>() { return "Content-Length"; }
+template <> TStringBuf THttpRequest::GetName<&THttpRequest::TransferEncoding>() { return "Transfer-Encoding"; }
+
+const TMap<TStringBuf, TStringBuf THttpRequest::*, TLessNoCase> THttpRequest::HeadersLocation = {
+ { THttpRequest::GetName<&THttpRequest::Host>(), &THttpRequest::Host },
+ { THttpRequest::GetName<&THttpRequest::Accept>(), &THttpRequest::Accept },
+ { THttpRequest::GetName<&THttpRequest::Connection>(), &THttpRequest::Connection },
+ { THttpRequest::GetName<&THttpRequest::ContentType>(), &THttpRequest::ContentType },
+ { THttpRequest::GetName<&THttpRequest::ContentLength>(), &THttpRequest::ContentLength },
+ { THttpRequest::GetName<&THttpRequest::TransferEncoding>(), &THttpRequest::TransferEncoding },
+};
+
+template <> TStringBuf THttpResponse::GetName<&THttpResponse::Connection>() { return "Connection"; }
+template <> TStringBuf THttpResponse::GetName<&THttpResponse::ContentType>() { return "Content-Type"; }
+template <> TStringBuf THttpResponse::GetName<&THttpResponse::ContentLength>() { return "Content-Length"; }
+template <> TStringBuf THttpResponse::GetName<&THttpResponse::TransferEncoding>() { return "Transfer-Encoding"; }
+template <> TStringBuf THttpResponse::GetName<&THttpResponse::LastModified>() { return "Last-Modified"; }
+template <> TStringBuf THttpResponse::GetName<&THttpResponse::ContentEncoding>() { return "Content-Encoding"; }
+
+const TMap<TStringBuf, TStringBuf THttpResponse::*, TLessNoCase> THttpResponse::HeadersLocation = {
+ { THttpResponse::GetName<&THttpResponse::Connection>(), &THttpResponse::Connection },
+ { THttpResponse::GetName<&THttpResponse::ContentType>(), &THttpResponse::ContentType },
+ { THttpResponse::GetName<&THttpResponse::ContentLength>(), &THttpResponse::ContentLength },
+ { THttpResponse::GetName<&THttpResponse::TransferEncoding>(), &THttpResponse::TransferEncoding },
+ { THttpResponse::GetName<&THttpResponse::LastModified>(), &THttpResponse::LastModified },
+ { THttpResponse::GetName<&THttpResponse::ContentEncoding>(), &THttpResponse::ContentEncoding }
+};
+
+void THttpRequest::Clear() {
+ // a dirty little trick
+ this->~THttpRequest(); // basically, do nothing
+ new (this) THttpRequest(); // reset all fields
+}
+
+template <>
+void THttpParser<THttpRequest, TSocketBuffer>::Advance(size_t len) {
+ TStringBuf data(Pos(), len);
+ while (!data.empty()) {
+ if (Stage != EParseStage::Error) {
+ LastSuccessStage = Stage;
+ }
+ switch (Stage) {
+ case EParseStage::Method: {
+ if (ProcessData(Method, data, ' ', MaxMethodSize)) {
+ Stage = EParseStage::URL;
+ }
+ break;
+ }
+ case EParseStage::URL: {
+ if (ProcessData(URL, data, ' ', MaxURLSize)) {
+ Stage = EParseStage::Protocol;
+ }
+ break;
+ }
+ case EParseStage::Protocol: {
+ if (ProcessData(Protocol, data, '/', MaxProtocolSize)) {
+ Stage = EParseStage::Version;
+ }
+ break;
+ }
+ case EParseStage::Version: {
+ if (ProcessData(Version, data, "\r\n", MaxVersionSize)) {
+ Stage = EParseStage::Header;
+ Headers = data;
+ }
+ break;
+ }
+ case EParseStage::Header: {
+ if (ProcessData(Header, data, "\r\n", MaxHeaderSize)) {
+ if (Header.empty()) {
+ Headers = TStringBuf(Headers.data(), data.begin() - Headers.begin());
+ if (HaveBody()) {
+ Stage = EParseStage::Body;
+ } else {
+ Stage = EParseStage::Done;
+ }
+ } else {
+ ProcessHeader(Header);
+ }
+ }
+ break;
+ }
+ case EParseStage::Body: {
+ if (!ContentLength.empty()) {
+ if (ProcessData(Content, data, FromString(ContentLength))) {
+ Body = Content;
+ Stage = EParseStage::Done;
+ }
+ } else if (TransferEncoding == "chunked") {
+ Stage = EParseStage::ChunkLength;
+ } else {
+ // Invalid body encoding
+ Stage = EParseStage::Error;
+ }
+ break;
+ }
+ case EParseStage::ChunkLength: {
+ if (ProcessData(Line, data, "\r\n", MaxChunkLengthSize)) {
+ if (!Line.empty()) {
+ ChunkLength = ParseHex(Line);
+ if (ChunkLength <= MaxChunkSize) {
+ ContentSize = Content.size() + ChunkLength;
+ if (ContentSize <= MaxChunkContentSize) {
+ Stage = EParseStage::ChunkData;
+ Line.Clear();
+ } else {
+ // Invalid chunk content length
+ Stage = EParseStage::Error;
+ }
+ } else {
+ // Invalid chunk length
+ Stage = EParseStage::Error;
+ }
+ } else {
+ // Invalid body encoding
+ Stage = EParseStage::Error;
+ }
+ }
+ break;
+ }
+ case EParseStage::ChunkData: {
+ if (!IsError()) {
+ if (ProcessData(Content, data, ContentSize)) {
+ if (ProcessData(Line, data, 2)) {
+ if (Line == "\r\n") {
+ if (ChunkLength == 0) {
+ Body = Content;
+ Stage = EParseStage::Done;
+ } else {
+ Stage = EParseStage::ChunkLength;
+ }
+ Line.Clear();
+ } else {
+ // Invalid body encoding
+ Stage = EParseStage::Error;
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ case EParseStage::Done:
+ case EParseStage::Error: {
+ data.Clear();
+ break;
+ }
+ default:
+ Y_FAIL("Invalid processing sequence");
+ break;
+ }
+ }
+ TSocketBuffer::Advance(len);
+}
+
+template <>
+THttpParser<THttpRequest, TSocketBuffer>::EParseStage THttpParser<THttpRequest, TSocketBuffer>::GetInitialStage() {
+ return EParseStage::Method;
+}
+
+template <>
+THttpParser<THttpResponse, TSocketBuffer>::EParseStage THttpParser<THttpResponse, TSocketBuffer>::GetInitialStage() {
+ return EParseStage::Protocol;
+}
+
+void THttpResponse::Clear() {
+ // a dirty little trick
+ this->~THttpResponse(); // basically, do nothing
+ new (this) THttpResponse(); // reset all fields
+}
+
+template <>
+void THttpParser<THttpResponse, TSocketBuffer>::Advance(size_t len) {
+ TStringBuf data(Pos(), len);
+ while (!data.empty()) {
+ if (Stage != EParseStage::Error) {
+ LastSuccessStage = Stage;
+ }
+ switch (Stage) {
+ case EParseStage::Protocol: {
+ if (ProcessData(Protocol, data, '/', MaxProtocolSize)) {
+ Stage = EParseStage::Version;
+ }
+ break;
+ }
+ case EParseStage::Version: {
+ if (ProcessData(Version, data, ' ', MaxVersionSize)) {
+ Stage = EParseStage::Status;
+ }
+ break;
+ }
+ case EParseStage::Status: {
+ if (ProcessData(Status, data, ' ', MaxStatusSize)) {
+ Stage = EParseStage::Message;
+ }
+ break;
+ }
+ case EParseStage::Message: {
+ if (ProcessData(Message, data, "\r\n", MaxMessageSize)) {
+ Stage = EParseStage::Header;
+ Headers = TStringBuf(data.data(), size_t(0));
+ }
+ break;
+ }
+ case EParseStage::Header: {
+ if (ProcessData(Header, data, "\r\n", MaxHeaderSize)) {
+ if (Header.empty()) {
+ if (HaveBody() && (ContentLength.empty() || ContentLength != "0")) {
+ Stage = EParseStage::Body;
+ } else {
+ Stage = EParseStage::Done;
+ }
+ } else {
+ ProcessHeader(Header);
+ }
+ Headers = TStringBuf(Headers.data(), data.data() - Headers.data());
+ }
+ break;
+ }
+ case EParseStage::Body: {
+ if (!ContentLength.empty()) {
+ if (ProcessData(Body, data, FromString(ContentLength))) {
+ Stage = EParseStage::Done;
+ }
+ } else if (TransferEncoding == "chunked") {
+ Stage = EParseStage::ChunkLength;
+ } else {
+ // Invalid body encoding
+ Stage = EParseStage::Error;
+ }
+ break;
+ }
+ case EParseStage::ChunkLength: {
+ if (ProcessData(Line, data, "\r\n", MaxChunkLengthSize)) {
+ if (!Line.empty()) {
+ ChunkLength = ParseHex(Line);
+ if (ChunkLength <= MaxChunkSize) {
+ ContentSize = Content.size() + ChunkLength;
+ if (ContentSize <= MaxChunkContentSize) {
+ Stage = EParseStage::ChunkData;
+ Line.Clear();
+ } else {
+ // Invalid chunk content length
+ Stage = EParseStage::Error;
+ }
+ } else {
+ // Invalid chunk length
+ Stage = EParseStage::Error;
+ }
+ } else {
+ // Invalid body encoding
+ Stage = EParseStage::Error;
+ }
+ }
+ break;
+ }
+ case EParseStage::ChunkData: {
+ if (!IsError()) {
+ if (ProcessData(Content, data, ContentSize)) {
+ if (ProcessData(Line, data, 2)) {
+ if (Line == "\r\n") {
+ if (ChunkLength == 0) {
+ Body = Content;
+ Stage = EParseStage::Done;
+ } else {
+ Stage = EParseStage::ChunkLength;
+ }
+ Line.Clear();
+ } else {
+ // Invalid body encoding
+ Stage = EParseStage::Error;
+ }
+ }
+ }
+ }
+ break;
+ }
+ case EParseStage::Done:
+ case EParseStage::Error:
+ data.Clear();
+ break;
+ default:
+ // Invalid processing sequence
+ Stage = EParseStage::Error;
+ break;
+ }
+ }
+ TSocketBuffer::Advance(len);
+}
+
+template <>
+void THttpParser<THttpResponse, TSocketBuffer>::ConnectionClosed() {
+ if (Stage == EParseStage::Done) {
+ return;
+ }
+ if (Stage == EParseStage::Body) {
+ // ?
+ Stage = EParseStage::Done;
+ } else {
+ LastSuccessStage = Stage;
+ Stage = EParseStage::Error;
+ }
+}
+
+THttpOutgoingResponsePtr THttpIncomingRequest::CreateResponseString(TStringBuf data) {
+ THttpOutgoingResponsePtr response = new THttpOutgoingResponse(this);
+ response->Append(data);
+ response->Reparse();
+ return response;
+}
+
+THttpOutgoingResponsePtr THttpIncomingRequest::CreateResponseOK(TStringBuf body, TStringBuf contentType, TInstant lastModified) {
+ return CreateResponse("200", "OK", contentType, body, lastModified);
+}
+
+THttpOutgoingResponsePtr THttpIncomingRequest::CreateResponseBadRequest(TStringBuf html, TStringBuf contentType) {
+ if (html.empty() && IsError()) {
+ contentType = "text/plain";
+ html = GetErrorText();
+ }
+ return CreateResponse("400", "Bad Request", contentType, html);
+}
+
+THttpOutgoingResponsePtr THttpIncomingRequest::CreateResponseNotFound(TStringBuf html, TStringBuf contentType) {
+ return CreateResponse("404", "Not Found", contentType, html);
+}
+
+THttpOutgoingResponsePtr THttpIncomingRequest::CreateResponseServiceUnavailable(TStringBuf html, TStringBuf contentType) {
+ return CreateResponse("503", "Service Unavailable", contentType, html);
+}
+
+THttpOutgoingResponsePtr THttpIncomingRequest::CreateResponseGatewayTimeout(TStringBuf html, TStringBuf contentType) {
+ return CreateResponse("504", "Gateway Timeout", contentType, html);
+}
+
+THttpIncomingResponse::THttpIncomingResponse(THttpOutgoingRequestPtr request)
+ : Request(request)
+{}
+
+THttpOutgoingResponsePtr THttpIncomingRequest::CreateResponse(TStringBuf status, TStringBuf message, TStringBuf contentType, TStringBuf body, TInstant lastModified) {
+ TStringBuf version = Version;
+ if (version != "1.0" && version != "1.1") {
+ version = "1.1";
+ }
+ THttpOutgoingResponsePtr response = new THttpOutgoingResponse(this, "HTTP", version, status, message);
+ response->Set<&THttpResponse::Connection>(GetConnection());
+ if (!WorkerName.empty()) {
+ response->Set("X-Worker-Name", WorkerName);
+ }
+ if (!contentType.empty() && !body.empty()) {
+ response->Set<&THttpResponse::ContentType>(contentType);
+ }
+ if (lastModified) {
+ response->Set<&THttpResponse::LastModified>(lastModified.FormatGmTime("%a, %d %b %Y %H:%M:%S GMT"));
+ }
+ if (response->IsNeedBody() || !body.empty()) {
+ if (Method == "HEAD") {
+ response->Set<&THttpResponse::ContentLength>(ToString(body.size()));
+ } else {
+ response->Set<&THttpResponse::Body>(body);
+ }
+ }
+ return response;
+}
+
+THttpIncomingRequestPtr THttpIncomingRequest::Duplicate() {
+ THttpIncomingRequestPtr request = new THttpIncomingRequest(*this);
+ request->Reparse();
+ request->Timer.Reset();
+ return request;
+}
+
+THttpIncomingResponsePtr THttpIncomingResponse::Duplicate(THttpOutgoingRequestPtr request) {
+ THttpIncomingResponsePtr response = new THttpIncomingResponse(*this);
+ response->Reparse();
+ response->Request = request;
+ return response;
+}
+
+THttpOutgoingResponsePtr THttpOutgoingResponse::Duplicate(THttpIncomingRequestPtr request) {
+ THttpOutgoingResponsePtr response = new THttpOutgoingResponse(*this);
+ response->Reparse();
+ response->Request = request;
+ return response;
+}
+
+
+THttpOutgoingResponsePtr THttpIncomingResponse::Reverse(THttpIncomingRequestPtr request) {
+ THttpOutgoingResponsePtr response = new THttpOutgoingResponse(request);
+ response->Assign(Data(), Size());
+ response->Reparse();
+ return response;
+}
+
+THttpOutgoingRequest::THttpOutgoingRequest(TStringBuf method, TStringBuf scheme, TStringBuf host, TStringBuf uri, TStringBuf protocol, TStringBuf version) {
+ Secure = (scheme == "https");
+ TString urie = UrlEscapeRet(uri);
+ InitRequest(method, urie, protocol, version);
+ if (host) {
+ Set<&THttpRequest::Host>(host);
+ }
+}
+
+THttpOutgoingRequest::THttpOutgoingRequest(TStringBuf method, TStringBuf url, TStringBuf protocol, TStringBuf version) {
+ TStringBuf scheme, host, uri;
+ if (!CrackURL(url, scheme, host, uri)) {
+ Y_FAIL("Invalid URL specified");
+ }
+ if (!scheme.empty() && scheme != "http" && scheme != "https") {
+ Y_FAIL("Invalid URL specified");
+ }
+ Secure = (scheme == "https");
+ TString urie = UrlEscapeRet(uri);
+ InitRequest(method, urie, protocol, version);
+ if (host) {
+ Set<&THttpRequest::Host>(host);
+ }
+}
+
+THttpOutgoingRequestPtr THttpOutgoingRequest::CreateRequestString(const TString& data) {
+ THttpOutgoingRequestPtr request = new THttpOutgoingRequest();
+ request->Assign(data.data(), data.size());
+ request->Reparse();
+ return request;
+}
+
+THttpOutgoingRequestPtr THttpOutgoingRequest::CreateRequestGet(TStringBuf url) {
+ return CreateRequest("GET", url);
+}
+
+THttpOutgoingRequestPtr THttpOutgoingRequest::CreateRequestGet(TStringBuf host, TStringBuf uri) {
+ return CreateHttpRequest("GET", host, uri);
+}
+
+THttpOutgoingRequestPtr THttpOutgoingRequest::CreateRequestPost(TStringBuf url, TStringBuf contentType, TStringBuf body) {
+ return CreateRequest("POST", url, contentType, body);
+}
+
+THttpOutgoingRequestPtr THttpOutgoingRequest::CreateRequestPost(TStringBuf host, TStringBuf uri, TStringBuf contentType, TStringBuf body) {
+ return CreateHttpRequest("POST", host, uri, contentType, body);
+}
+
+THttpOutgoingRequestPtr THttpOutgoingRequest::CreateRequest(TStringBuf method, TStringBuf url, TStringBuf contentType, TStringBuf body) {
+ THttpOutgoingRequestPtr request = new THttpOutgoingRequest(method, url, "HTTP", "1.1");
+ request->Set<&THttpRequest::Accept>("*/*");
+ if (!contentType.empty()) {
+ request->Set<&THttpRequest::ContentType>(contentType);
+ request->Set<&THttpRequest::Body>(body);
+ }
+ return request;
+}
+
+THttpOutgoingRequestPtr THttpOutgoingRequest::CreateHttpRequest(TStringBuf method, TStringBuf host, TStringBuf uri, TStringBuf contentType, TStringBuf body) {
+ THttpOutgoingRequestPtr request = new THttpOutgoingRequest(method, "http", host, uri, "HTTP", "1.1");
+ request->Set<&THttpRequest::Accept>("*/*");
+ if (!contentType.empty()) {
+ request->Set<&THttpRequest::ContentType>(contentType);
+ request->Set<&THttpRequest::Body>(body);
+ }
+ return request;
+}
+
+THttpOutgoingRequestPtr THttpOutgoingRequest::Duplicate() {
+ THttpOutgoingRequestPtr request = new THttpOutgoingRequest(*this);
+ request->Reparse();
+ return request;
+}
+
+THttpOutgoingResponse::THttpOutgoingResponse(THttpIncomingRequestPtr request)
+ : Request(request)
+{}
+
+THttpOutgoingResponse::THttpOutgoingResponse(THttpIncomingRequestPtr request, TStringBuf protocol, TStringBuf version, TStringBuf status, TStringBuf message)
+ : Request(request)
+{
+ InitResponse(protocol, version, status, message);
+}
+
+const size_t THttpConfig::BUFFER_MIN_STEP;
+const TDuration THttpConfig::CONNECTION_TIMEOUT;
+
+TUrlParameters::TUrlParameters(TStringBuf url) {
+ TStringBuf base;
+ TStringBuf params;
+ if (url.TrySplit('?', base, params)) {
+ for (TStringBuf param = params.NextTok('&'); !param.empty(); param = params.NextTok('&')) {
+ TStringBuf name = param.NextTok('=');
+ Parameters[name] = param;
+ }
+ }
+}
+
+TString TUrlParameters::operator [](TStringBuf name) const {
+ TString value(Get(name));
+ CGIUnescape(value);
+ return value;
+}
+
+bool TUrlParameters::Has(TStringBuf name) const {
+ return Parameters.count(name) != 0;
+}
+
+TStringBuf TUrlParameters::Get(TStringBuf name) const {
+ auto it = Parameters.find(name);
+ if (it != Parameters.end()) {
+ return it->second;
+ }
+ return TStringBuf();
+}
+
+TString TUrlParameters::Render() const {
+ TStringBuilder parameters;
+ for (const std::pair<TStringBuf, TStringBuf> parameter : Parameters) {
+ if (parameters.empty()) {
+ parameters << '?';
+ } else {
+ parameters << '&';
+ }
+ parameters << parameter.first;
+ parameters << '=';
+ parameters << parameter.second;
+ }
+ return parameters;
+}
+
+TCookies::TCookies(TStringBuf cookie) {
+ for (TStringBuf param = cookie.NextTok(';'); !param.empty(); param = cookie.NextTok(';')) {
+ param.SkipPrefix(" ");
+ TStringBuf name = param.NextTok('=');
+ Cookies[name] = param;
+ }
+}
+
+TStringBuf TCookies::operator [](TStringBuf name) const {
+ return Get(name);
+}
+
+bool TCookies::Has(TStringBuf name) const {
+ return Cookies.count(name) != 0;
+}
+
+TStringBuf TCookies::Get(TStringBuf name) const {
+ auto it = Cookies.find(name);
+ if (it != Cookies.end()) {
+ return it->second;
+ }
+ return TStringBuf();
+}
+
+TString TCookies::Render() const {
+ TStringBuilder cookies;
+ for (const std::pair<TStringBuf, TStringBuf> cookie : Cookies) {
+ if (!cookies.empty()) {
+ cookies << ' ';
+ }
+ cookies << cookie.first;
+ cookies << '=';
+ cookies << cookie.second;
+ cookies << ';';
+ }
+ return cookies;
+}
+
+TCookiesBuilder::TCookiesBuilder()
+ :TCookies(TStringBuf())
+{}
+
+void TCookiesBuilder::Set(TStringBuf name, TStringBuf data) {
+ Data.emplace_back(name, data);
+ Cookies[Data.back().first] = Data.back().second;
+}
+
+THeaders::THeaders(TStringBuf headers) {
+ for (TStringBuf param = headers.NextTok("\r\n"); !param.empty(); param = headers.NextTok("\r\n")) {
+ TStringBuf name = param.NextTok(":");
+ param.SkipPrefix(" ");
+ Headers[name] = param;
+ }
+}
+
+TStringBuf THeaders::operator [](TStringBuf name) const {
+ return Get(name);
+}
+
+bool THeaders::Has(TStringBuf name) const {
+ return Headers.count(name) != 0;
+}
+
+TStringBuf THeaders::Get(TStringBuf name) const {
+ auto it = Headers.find(name);
+ if (it != Headers.end()) {
+ return it->second;
+ }
+ return TStringBuf();
+}
+
+TString THeaders::Render() const {
+ TStringBuilder headers;
+ for (const std::pair<TStringBuf, TStringBuf> header : Headers) {
+ headers << header.first;
+ headers << ": ";
+ headers << header.second;
+ headers << "\r\n";
+ }
+ return headers;
+}
+
+THeadersBuilder::THeadersBuilder()
+ :THeaders(TStringBuf())
+{}
+
+THeadersBuilder::THeadersBuilder(const THeadersBuilder& builder) {
+ for (const auto& pr : builder.Headers) {
+ Set(pr.first, pr.second);
+ }
+}
+
+void THeadersBuilder::Set(TStringBuf name, TStringBuf data) {
+ Data.emplace_back(name, data);
+ Headers[Data.back().first] = Data.back().second;
+}
+
+}
diff --git a/library/cpp/actors/http/http.h b/library/cpp/actors/http/http.h
new file mode 100644
index 0000000000..96c5c1ec48
--- /dev/null
+++ b/library/cpp/actors/http/http.h
@@ -0,0 +1,703 @@
+#pragma once
+#include <util/datetime/base.h>
+#include <util/string/builder.h>
+#include <util/system/thread.h>
+#include <util/system/hp_timer.h>
+#include <util/generic/hash_set.h>
+#include <util/generic/buffer.h>
+#include <util/generic/intrlist.h>
+#include "http_config.h"
+
+// TODO(xenoxeno): hide in implementation
+template <typename Type>
+struct THash<TIntrusivePtr<Type>> {
+ size_t operator ()(const TIntrusivePtr<Type>& ptr) const { return reinterpret_cast<size_t>(ptr.Get()); }
+};
+
+template<>
+inline void Out<TSockAddrInet6>(IOutputStream& o, const TSockAddrInet6& x) {
+ o << x.ToString();
+}
+
+namespace NHttp {
+
+bool IsIPv6(const TString& host);
+bool CrackURL(TStringBuf url, TStringBuf& scheme, TStringBuf& host, TStringBuf& uri);
+void CrackAddress(const TString& address, TString& hostname, TIpPort& port);
+void TrimBegin(TStringBuf& target, char delim);
+void TrimEnd(TStringBuf& target, char delim);
+void Trim(TStringBuf& target, char delim);
+void TrimEnd(TString& target, char delim);
+
+struct TLessNoCase {
+ bool operator()(TStringBuf l, TStringBuf r) const {
+ auto ll = l.length();
+ auto rl = r.length();
+ if (ll != rl) {
+ return ll < rl;
+ }
+ return strnicmp(l.data(), r.data(), ll) < 0;
+ }
+};
+
+struct TUrlParameters {
+ THashMap<TStringBuf, TStringBuf> Parameters;
+
+ TUrlParameters(TStringBuf url);
+ TString operator [](TStringBuf name) const;
+ bool Has(TStringBuf name) const;
+ TStringBuf Get(TStringBuf name) const; // raw
+ TString Render() const;
+};
+
+struct TCookies {
+ THashMap<TStringBuf, TStringBuf> Cookies;
+
+ TCookies(TStringBuf cookie);
+ TCookies(const TCookies&) = delete;
+ TStringBuf operator [](TStringBuf name) const;
+ bool Has(TStringBuf name) const;
+ TStringBuf Get(TStringBuf name) const; // raw
+ TString Render() const;
+};
+
+struct TCookiesBuilder : TCookies {
+ TDeque<std::pair<TString, TString>> Data;
+
+ TCookiesBuilder();
+ void Set(TStringBuf name, TStringBuf data);
+};
+
+struct THeaders {
+ TMap<TStringBuf, TStringBuf, TLessNoCase> Headers;
+
+ THeaders() = default;
+ THeaders(TStringBuf headers);
+ THeaders(const THeaders&) = delete;
+ TStringBuf operator [](TStringBuf name) const;
+ bool Has(TStringBuf name) const;
+ TStringBuf Get(TStringBuf name) const; // raw
+ TString Render() const;
+};
+
+struct THeadersBuilder : THeaders {
+ TDeque<std::pair<TString, TString>> Data;
+
+ THeadersBuilder();
+ THeadersBuilder(const THeadersBuilder& builder);
+ void Set(TStringBuf name, TStringBuf data);
+};
+
+class TSocketBuffer : public TBuffer, public THttpConfig {
+public:
+ TSocketBuffer()
+ : TBuffer(BUFFER_SIZE)
+ {}
+
+ bool EnsureEnoughSpaceAvailable(size_t need) {
+ size_t avail = Avail();
+ if (avail < need) {
+ Reserve(Capacity() + std::max(need, BUFFER_MIN_STEP));
+ return false;
+ }
+ return true;
+ }
+};
+
+class THttpRequest {
+public:
+ TStringBuf Method;
+ TStringBuf URL;
+ TStringBuf Protocol;
+ TStringBuf Version;
+ TStringBuf Headers;
+
+ TStringBuf Host;
+ TStringBuf Accept;
+ TStringBuf Connection;
+ TStringBuf ContentType;
+ TStringBuf ContentLength;
+ TStringBuf TransferEncoding;
+
+ TStringBuf Body;
+
+ static const TMap<TStringBuf, TStringBuf THttpRequest::*, TLessNoCase> HeadersLocation;
+
+ template <TStringBuf THttpRequest::* Header>
+ static TStringBuf GetName();
+ void Clear();
+};
+
+class THttpResponse {
+public:
+ TStringBuf Protocol;
+ TStringBuf Version;
+ TStringBuf Status;
+ TStringBuf Message;
+ TStringBuf Headers;
+
+ TStringBuf Connection;
+ TStringBuf ContentType;
+ TStringBuf ContentLength;
+ TStringBuf TransferEncoding;
+ TStringBuf LastModified;
+ TStringBuf ContentEncoding;
+
+ TStringBuf Body;
+
+ static const TMap<TStringBuf, TStringBuf THttpResponse::*, TLessNoCase> HeadersLocation;
+
+ template <TStringBuf THttpResponse::* Header>
+ static TStringBuf GetName();
+ void Clear();
+};
+
+template <typename HeaderType, typename BufferType>
+class THttpParser : public HeaderType, public BufferType {
+public:
+ enum class EParseStage : ui8 {
+ Method,
+ URL,
+ Protocol,
+ Version,
+ Status,
+ Message,
+ Header,
+ Body,
+ ChunkLength,
+ ChunkData,
+ Done,
+ Error,
+ };
+
+ static constexpr size_t MaxMethodSize = 6;
+ static constexpr size_t MaxURLSize = 1024;
+ static constexpr size_t MaxProtocolSize = 4;
+ static constexpr size_t MaxVersionSize = 4;
+ static constexpr size_t MaxStatusSize = 3;
+ static constexpr size_t MaxMessageSize = 1024;
+ static constexpr size_t MaxHeaderSize = 8192;
+ static constexpr size_t MaxChunkLengthSize = 8;
+ static constexpr size_t MaxChunkSize = 256 * 1024 * 1024;
+ static constexpr size_t MaxChunkContentSize = 1 * 1024 * 1024 * 1024;
+
+ EParseStage Stage;
+ EParseStage LastSuccessStage;
+ TStringBuf Line;
+ TStringBuf& Header = Line;
+ size_t ChunkLength = 0;
+ size_t ContentSize = 0;
+ TString Content;
+
+ THttpParser(const THttpParser& src)
+ : HeaderType(src)
+ , BufferType(src)
+ , Stage(src.Stage)
+ , LastSuccessStage(src.LastSuccessStage)
+ , Line()
+ , Header(Line)
+ , ChunkLength(src.ChunkLength)
+ , ContentSize(src.ContentSize)
+ , Content(src.Content)
+ {}
+
+ template <typename StringType>
+ bool ProcessData(StringType& target, TStringBuf& source, char delim, size_t maxLen) {
+ TStringBuf maxSource(source.substr(0, maxLen + 1 - target.size()));
+ size_t pos = maxSource.find(delim);
+ target += maxSource.substr(0, pos);
+ source.Skip(pos);
+ if (target.size() > maxLen) {
+ Stage = EParseStage::Error;
+ return false;
+ }
+ if (!source.empty() && *source.begin() == delim) {
+ source.Skip(1);
+ }
+ return pos != TStringBuf::npos;
+ }
+
+ template <typename StringType>
+ bool ProcessData(StringType& target, TStringBuf& source, TStringBuf delim, size_t maxLen) {
+ if (delim.empty()) {
+ return false;
+ }
+ if (delim.size() == 1) {
+ return ProcessData(target, source, delim[0], maxLen);
+ }
+ if (ProcessData(target, source, delim.back(), maxLen + 1)) {
+ for (signed i = delim.size() - 2; i >= 0; --i) {
+ TrimEnd(target, delim[i]);
+ }
+ return true;
+ }
+ return false;
+ }
+
+ template <typename StringType>
+ bool ProcessData(StringType& target, TStringBuf& source, size_t size) {
+ TStringBuf maxSource(source.substr(0, size - target.size()));
+ target += maxSource;
+ source.Skip(maxSource.size());
+ if (target.size() > size && !source.empty()) {
+ Stage = EParseStage::Error;
+ return false;
+ }
+ return target.size() == size;
+ }
+
+ void ProcessHeader(TStringBuf& header) {
+ TStringBuf name = header.NextTok(':');
+ TrimBegin(name, ' ');
+ TStringBuf value = header;
+ Trim(value, ' ');
+ auto cit = HeaderType::HeadersLocation.find(name);
+ if (cit != HeaderType::HeadersLocation.end()) {
+ this->*cit->second = value;
+ }
+ header.Clear();
+ }
+
+ size_t ParseHex(TStringBuf value) {
+ size_t result = 0;
+ for (char ch : value) {
+ if (ch >= '0' && ch <= '9') {
+ result *= 16;
+ result += ch - '0';
+ } else if (ch >= 'a' && ch <= 'f') {
+ result *= 16;
+ result += 10 + ch - 'a';
+ } else if (ch >= 'A' && ch <= 'F') {
+ result *= 16;
+ result += 10 + ch - 'A';
+ } else if (ch == ';') {
+ break;
+ } else if (isspace(ch)) {
+ continue;
+ } else {
+ Stage = EParseStage::Error;
+ return 0;
+ }
+ }
+ return result;
+ }
+
+ void Advance(size_t len);
+ void ConnectionClosed();
+
+ void Clear() {
+ BufferType::Clear();
+ HeaderType::Clear();
+ Stage = GetInitialStage();
+ Line.Clear();
+ Content.clear();
+ }
+
+ bool IsReady() const {
+ return Stage == EParseStage::Done;
+ }
+
+ bool IsError() const {
+ return Stage == EParseStage::Error;
+ }
+
+ TStringBuf GetErrorText() const {
+ switch (LastSuccessStage) {
+ case EParseStage::Method:
+ return "Invalid http method";
+ case EParseStage::URL:
+ return "Invalid url";
+ case EParseStage::Protocol:
+ return "Invalid http protocol";
+ case EParseStage::Version:
+ return "Invalid http version";
+ case EParseStage::Status:
+ return "Invalid http status";
+ case EParseStage::Message:
+ return "Invalid http message";
+ case EParseStage::Header:
+ return "Invalid http header";
+ case EParseStage::Body:
+ return "Invalid content body";
+ case EParseStage::ChunkLength:
+ case EParseStage::ChunkData:
+ return "Broken chunked data";
+ case EParseStage::Done:
+ return "Everything is fine";
+ case EParseStage::Error:
+ return "Error on error"; // wat? ...because we don't want to include default label here
+ }
+ }
+
+ bool IsDone() const {
+ return IsReady() || IsError();
+ }
+
+ bool HaveBody() const {
+ return !HeaderType::ContentType.empty() || !HeaderType::ContentLength.empty() || !HeaderType::TransferEncoding.empty();
+ }
+
+ bool EnsureEnoughSpaceAvailable(size_t need = BufferType::BUFFER_MIN_STEP) {
+ bool result = BufferType::EnsureEnoughSpaceAvailable(need);
+ if (!result && !BufferType::Empty()) {
+ Reparse();
+ }
+ return true;
+ }
+
+ void Reparse() {
+ size_t size = BufferType::Size();
+ Clear();
+ Advance(size);
+ }
+
+ TStringBuf GetRawData() const {
+ return TStringBuf(BufferType::Data(), BufferType::Size());
+ }
+
+ TString GetObfuscatedData() const {
+ THeaders headers(HeaderType::Headers);
+ TStringBuf authorization(headers["Authorization"]);
+ TStringBuf cookie(headers["Cookie"]);
+ TStringBuf x_ydb_auth_ticket(headers["x-ydb-auth-ticket"]);
+ TStringBuf x_yacloud_subjecttoken(headers["x-yacloud-subjecttoken"]);
+ TString data(GetRawData());
+ if (!authorization.empty()) {
+ auto pos = data.find(authorization);
+ if (pos != TString::npos) {
+ data.replace(pos, authorization.size(), TString("<obfuscated>"));
+ }
+ }
+ if (!cookie.empty()) {
+ auto pos = data.find(cookie);
+ if (pos != TString::npos) {
+ data.replace(pos, cookie.size(), TString("<obfuscated>"));
+ }
+ }
+ if (!x_ydb_auth_ticket.empty()) {
+ auto pos = data.find(x_ydb_auth_ticket);
+ if (pos != TString::npos) {
+ data.replace(pos, x_ydb_auth_ticket.size(), TString("<obfuscated>"));
+ }
+ }
+ if (!x_yacloud_subjecttoken.empty()) {
+ auto pos = data.find(x_yacloud_subjecttoken);
+ if (pos != TString::npos) {
+ data.replace(pos, x_yacloud_subjecttoken.size(), TString("<obfuscated>"));
+ }
+ }
+ return data;
+ }
+
+ static EParseStage GetInitialStage();
+
+ THttpParser()
+ : Stage(GetInitialStage())
+ , LastSuccessStage(Stage)
+ {}
+};
+
+template <typename HeaderType, typename BufferType>
+class THttpRenderer : public HeaderType, public BufferType {
+public:
+ enum class ERenderStage {
+ Init,
+ Header,
+ Body,
+ Done,
+ Error,
+ };
+
+ ERenderStage Stage = ERenderStage::Init;
+
+ void Append(TStringBuf text) {
+ EnsureEnoughSpaceAvailable(text.size());
+ BufferType::Append(text.data(), text.size());
+ }
+
+ void Append(char c) {
+ EnsureEnoughSpaceAvailable(sizeof(c));
+ BufferType::Append(c);
+ }
+
+ template <TStringBuf HeaderType::* string>
+ void AppendParsedValue(TStringBuf value) {
+ Append(value);
+ static_cast<HeaderType*>(this)->*string = TStringBuf(BufferType::Pos() - value.size(), value.size());
+ }
+
+ template <TStringBuf HeaderType::* name>
+ void Set(TStringBuf value) {
+ Y_VERIFY_DEBUG(Stage == ERenderStage::Header);
+ Append(HeaderType::template GetName<name>());
+ Append(": ");
+ AppendParsedValue<name>(value);
+ Append("\r\n");
+ HeaderType::Headers = TStringBuf(HeaderType::Headers.Data(), BufferType::Pos() - HeaderType::Headers.Data());
+ }
+
+ void Set(TStringBuf name, TStringBuf value) {
+ Y_VERIFY_DEBUG(Stage == ERenderStage::Header);
+ Append(name);
+ Append(": ");
+ Append(value);
+ Append("\r\n");
+ HeaderType::Headers = TStringBuf(HeaderType::Headers.Data(), BufferType::Pos() - HeaderType::Headers.Data());
+ }
+
+ void Set(const THeaders& headers) {
+ Y_VERIFY_DEBUG(Stage == ERenderStage::Header);
+ Append(headers.Render());
+ HeaderType::Headers = TStringBuf(HeaderType::Headers.Data(), BufferType::Pos() - HeaderType::Headers.Data());
+ }
+
+ //THttpRenderer(TStringBuf method, TStringBuf url, TStringBuf protocol, TStringBuf version); // request
+ void InitRequest(TStringBuf method, TStringBuf url, TStringBuf protocol, TStringBuf version) {
+ Y_VERIFY_DEBUG(Stage == ERenderStage::Init);
+ AppendParsedValue<&THttpRequest::Method>(method);
+ Append(' ');
+ AppendParsedValue<&THttpRequest::URL>(url);
+ Append(' ');
+ AppendParsedValue<&THttpRequest::Protocol>(protocol);
+ Append('/');
+ AppendParsedValue<&THttpRequest::Version>(version);
+ Append("\r\n");
+ Stage = ERenderStage::Header;
+ HeaderType::Headers = TStringBuf(BufferType::Pos(), size_t(0));
+ }
+
+ //THttpRenderer(TStringBuf protocol, TStringBuf version, TStringBuf status, TStringBuf message); // response
+ void InitResponse(TStringBuf protocol, TStringBuf version, TStringBuf status, TStringBuf message) {
+ Y_VERIFY_DEBUG(Stage == ERenderStage::Init);
+ AppendParsedValue<&THttpResponse::Protocol>(protocol);
+ Append('/');
+ AppendParsedValue<&THttpResponse::Version>(version);
+ Append(' ');
+ AppendParsedValue<&THttpResponse::Status>(status);
+ Append(' ');
+ AppendParsedValue<&THttpResponse::Message>(message);
+ Append("\r\n");
+ Stage = ERenderStage::Header;
+ HeaderType::Headers = TStringBuf(BufferType::Pos(), size_t(0));
+ }
+
+ void FinishHeader() {
+ Append("\r\n");
+ HeaderType::Headers = TStringBuf(HeaderType::Headers.Data(), BufferType::Pos() - HeaderType::Headers.Data());
+ Stage = ERenderStage::Body;
+ }
+
+ void SetBody(TStringBuf body) {
+ Y_VERIFY_DEBUG(Stage == ERenderStage::Header);
+ if (HeaderType::ContentLength.empty()) {
+ Set<&HeaderType::ContentLength>(ToString(body.size()));
+ }
+ FinishHeader();
+ AppendParsedValue<&HeaderType::Body>(body);
+ Stage = ERenderStage::Done;
+ }
+
+ bool IsDone() const {
+ return Stage == ERenderStage::Done;
+ }
+
+ void Finish() {
+ switch (Stage) {
+ case ERenderStage::Header:
+ FinishHeader();
+ break;
+ default:
+ break;
+ }
+ }
+
+ bool EnsureEnoughSpaceAvailable(size_t need = BufferType::BUFFER_MIN_STEP) {
+ bool result = BufferType::EnsureEnoughSpaceAvailable(need);
+ if (!result && !BufferType::Empty()) {
+ Reparse();
+ }
+ return true;
+ }
+
+ void Clear() {
+ BufferType::Clear();
+ HeaderType::Clear();
+ }
+
+ void Reparse() {
+ // move-magic
+ size_t size = BufferType::Size();
+ THttpParser<HeaderType, BufferType> parser;
+ // move the buffer to parser
+ static_cast<BufferType&>(parser) = std::move(static_cast<BufferType&>(*this));
+ // reparse
+ parser.Clear();
+ parser.Advance(size);
+ // move buffer and result back
+ static_cast<HeaderType&>(*this) = std::move(static_cast<HeaderType&>(parser));
+ static_cast<BufferType&>(*this) = std::move(static_cast<BufferType&>(parser));
+ switch (parser.Stage) {
+ case THttpParser<HeaderType, BufferType>::EParseStage::Method:
+ case THttpParser<HeaderType, BufferType>::EParseStage::URL:
+ case THttpParser<HeaderType, BufferType>::EParseStage::Protocol:
+ case THttpParser<HeaderType, BufferType>::EParseStage::Version:
+ case THttpParser<HeaderType, BufferType>::EParseStage::Status:
+ case THttpParser<HeaderType, BufferType>::EParseStage::Message:
+ Stage = ERenderStage::Init;
+ break;
+ case THttpParser<HeaderType, BufferType>::EParseStage::Header:
+ Stage = ERenderStage::Header;
+ break;
+ case THttpParser<HeaderType, BufferType>::EParseStage::Body:
+ case THttpParser<HeaderType, BufferType>::EParseStage::ChunkLength:
+ case THttpParser<HeaderType, BufferType>::EParseStage::ChunkData:
+ Stage = ERenderStage::Body;
+ break;
+ case THttpParser<HeaderType, BufferType>::EParseStage::Done:
+ Stage = ERenderStage::Done;
+ break;
+ case THttpParser<HeaderType, BufferType>::EParseStage::Error:
+ Stage = ERenderStage::Error;
+ break;
+ }
+ Y_VERIFY(size == BufferType::Size());
+ }
+
+ TStringBuf GetRawData() const {
+ return TStringBuf(BufferType::Data(), BufferType::Size());
+ }
+};
+
+template <>
+template <>
+inline void THttpRenderer<THttpResponse, TSocketBuffer>::Set<&THttpResponse::Body>(TStringBuf value) {
+ SetBody(value);
+}
+
+template <>
+template <>
+inline void THttpRenderer<THttpRequest, TSocketBuffer>::Set<&THttpRequest::Body>(TStringBuf value) {
+ SetBody(value);
+}
+
+class THttpIncomingRequest;
+using THttpIncomingRequestPtr = TIntrusivePtr<THttpIncomingRequest>;
+
+class THttpOutgoingResponse;
+using THttpOutgoingResponsePtr = TIntrusivePtr<THttpOutgoingResponse>;
+
+class THttpIncomingRequest :
+ public THttpParser<THttpRequest, TSocketBuffer>,
+ public TRefCounted<THttpIncomingRequest, TAtomicCounter> {
+public:
+ THttpConfig::SocketAddressType Address;
+ TString WorkerName;
+ THPTimer Timer;
+ bool Secure = false;
+
+ bool IsConnectionClose() const {
+ if (Connection.empty()) {
+ return Version == "1.0";
+ } else {
+ return Connection == "close";
+ }
+ }
+
+ TStringBuf GetConnection() const {
+ if (!Connection.empty()) {
+ return Connection;
+ }
+ return Version == "1.0" ? "close" : "keep-alive";
+ }
+
+ THttpOutgoingResponsePtr CreateResponseOK(TStringBuf body, TStringBuf contentType = "text/html", TInstant lastModified = TInstant());
+ THttpOutgoingResponsePtr CreateResponseString(TStringBuf data);
+ THttpOutgoingResponsePtr CreateResponseBadRequest(TStringBuf html = TStringBuf(), TStringBuf contentType = "text/html"); // 400
+ THttpOutgoingResponsePtr CreateResponseNotFound(TStringBuf html = TStringBuf(), TStringBuf contentType = "text/html"); // 404
+ THttpOutgoingResponsePtr CreateResponseServiceUnavailable(TStringBuf html = TStringBuf(), TStringBuf contentType = "text/html"); // 503
+ THttpOutgoingResponsePtr CreateResponseGatewayTimeout(TStringBuf html = TStringBuf(), TStringBuf contentType = "text/html"); // 504
+ THttpOutgoingResponsePtr CreateResponse(
+ TStringBuf status,
+ TStringBuf message,
+ TStringBuf contentType = TStringBuf(),
+ TStringBuf body = TStringBuf(),
+ TInstant lastModified = TInstant());
+
+ THttpIncomingRequestPtr Duplicate();
+};
+
+class THttpIncomingResponse;
+using THttpIncomingResponsePtr = TIntrusivePtr<THttpIncomingResponse>;
+
+class THttpOutgoingRequest;
+using THttpOutgoingRequestPtr = TIntrusivePtr<THttpOutgoingRequest>;
+
+class THttpIncomingResponse :
+ public THttpParser<THttpResponse, TSocketBuffer>,
+ public TRefCounted<THttpIncomingResponse, TAtomicCounter> {
+public:
+ THttpIncomingResponse(THttpOutgoingRequestPtr request);
+
+ THttpOutgoingRequestPtr GetRequest() const {
+ return Request;
+ }
+
+ THttpIncomingResponsePtr Duplicate(THttpOutgoingRequestPtr request);
+ THttpOutgoingResponsePtr Reverse(THttpIncomingRequestPtr request);
+
+protected:
+ THttpOutgoingRequestPtr Request;
+};
+
+class THttpOutgoingRequest :
+ public THttpRenderer<THttpRequest, TSocketBuffer>,
+ public TRefCounted<THttpOutgoingRequest, TAtomicCounter> {
+public:
+ THPTimer Timer;
+ bool Secure = false;
+
+ THttpOutgoingRequest() = default;
+ THttpOutgoingRequest(TStringBuf method, TStringBuf url, TStringBuf protocol, TStringBuf version);
+ THttpOutgoingRequest(TStringBuf method, TStringBuf scheme, TStringBuf host, TStringBuf uri, TStringBuf protocol, TStringBuf version);
+ static THttpOutgoingRequestPtr CreateRequestString(TStringBuf data);
+ static THttpOutgoingRequestPtr CreateRequestString(const TString& data);
+ static THttpOutgoingRequestPtr CreateRequestGet(TStringBuf url);
+ static THttpOutgoingRequestPtr CreateRequestGet(TStringBuf host, TStringBuf uri); // http only
+ static THttpOutgoingRequestPtr CreateRequestPost(TStringBuf url, TStringBuf contentType = {}, TStringBuf body = {});
+ static THttpOutgoingRequestPtr CreateRequestPost(TStringBuf host, TStringBuf uri, TStringBuf contentType, TStringBuf body); // http only
+ static THttpOutgoingRequestPtr CreateRequest(TStringBuf method, TStringBuf url, TStringBuf contentType = TStringBuf(), TStringBuf body = TStringBuf());
+ static THttpOutgoingRequestPtr CreateHttpRequest(TStringBuf method, TStringBuf host, TStringBuf uri, TStringBuf contentType = TStringBuf(), TStringBuf body = TStringBuf());
+ THttpOutgoingRequestPtr Duplicate();
+};
+
+class THttpOutgoingResponse :
+ public THttpRenderer<THttpResponse, TSocketBuffer>,
+ public TRefCounted<THttpOutgoingResponse, TAtomicCounter> {
+public:
+ THttpOutgoingResponse(THttpIncomingRequestPtr request);
+ THttpOutgoingResponse(THttpIncomingRequestPtr request, TStringBuf protocol, TStringBuf version, TStringBuf status, TStringBuf message);
+
+ bool IsConnectionClose() const {
+ if (!Connection.empty()) {
+ return Connection == "close";
+ } else {
+ return Request->IsConnectionClose();
+ }
+ }
+
+ bool IsNeedBody() const {
+ return Status != "204";
+ }
+
+ THttpIncomingRequestPtr GetRequest() const {
+ return Request;
+ }
+
+ THttpOutgoingResponsePtr Duplicate(THttpIncomingRequestPtr request);
+
+// it's temporary accessible for cleanup
+//protected:
+ THttpIncomingRequestPtr Request;
+};
+
+}
diff --git a/library/cpp/actors/http/http_cache.cpp b/library/cpp/actors/http/http_cache.cpp
new file mode 100644
index 0000000000..27c4eeb6f3
--- /dev/null
+++ b/library/cpp/actors/http/http_cache.cpp
@@ -0,0 +1,599 @@
+#include "http.h"
+#include "http_proxy.h"
+#include "http_cache.h"
+#include <library/cpp/actors/core/actor_bootstrapped.h>
+#include <library/cpp/actors/core/executor_pool_basic.h>
+#include <library/cpp/actors/core/log.h>
+#include <library/cpp/actors/core/scheduler_basic.h>
+#include <library/cpp/actors/http/http.h>
+#include <library/cpp/digest/md5/md5.h>
+#include <util/digest/multi.h>
+#include <util/generic/queue.h>
+#include <util/string/cast.h>
+
+namespace NHttp {
+
+class THttpOutgoingCacheActor : public NActors::TActorBootstrapped<THttpOutgoingCacheActor>, THttpConfig {
+public:
+ using TBase = NActors::TActorBootstrapped<THttpOutgoingCacheActor>;
+ NActors::TActorId HttpProxyId;
+ TGetCachePolicy GetCachePolicy;
+ static constexpr TDuration RefreshTimeout = TDuration::Seconds(1);
+
+ struct TCacheKey {
+ TString Host;
+ TString URL;
+ TString Headers;
+
+ operator size_t() const {
+ return MultiHash(Host, URL, Headers);
+ }
+
+ TString GetId() const {
+ return MD5::Calc(Host + ':' + URL + ':' + Headers);
+ }
+ };
+
+ struct TCacheRecord {
+ TInstant RefreshTime;
+ TInstant DeathTime;
+ TCachePolicy CachePolicy;
+ NHttp::THttpOutgoingRequestPtr Request;
+ NHttp::THttpOutgoingRequestPtr OutgoingRequest;
+ TDuration Timeout;
+ NHttp::THttpIncomingResponsePtr Response;
+ TString Error;
+ TVector<NHttp::TEvHttpProxy::TEvHttpOutgoingRequest::TPtr> Waiters;
+
+ TCacheRecord(const TCachePolicy cachePolicy)
+ : CachePolicy(cachePolicy)
+ {}
+
+ bool IsValid() const {
+ return Response != nullptr || !Error.empty();
+ }
+
+ void UpdateResponse(NHttp::THttpIncomingResponsePtr response, const TString& error, TInstant now) {
+ if (error.empty() || Response == nullptr || !CachePolicy.KeepOnError) {
+ Response = response;
+ Error = error;
+ }
+ RefreshTime = now + CachePolicy.TimeToRefresh;
+ if (CachePolicy.PaceToRefresh) {
+ RefreshTime += TDuration::MilliSeconds(RandomNumber<ui64>() % CachePolicy.PaceToRefresh.MilliSeconds());
+ }
+ }
+
+ TString GetName() const {
+ return TStringBuilder() << (Request->Secure ? "https://" : "http://") << Request->Host << Request->URL;
+ }
+ };
+
+ struct TRefreshRecord {
+ TCacheKey Key;
+ TInstant RefreshTime;
+
+ bool operator <(const TRefreshRecord& b) const {
+ return RefreshTime > b.RefreshTime;
+ }
+ };
+
+ THashMap<TCacheKey, TCacheRecord> Cache;
+ TPriorityQueue<TRefreshRecord> RefreshQueue;
+ THashMap<THttpOutgoingRequest*, TCacheKey> OutgoingRequests;
+
+ THttpOutgoingCacheActor(const NActors::TActorId& httpProxyId, TGetCachePolicy getCachePolicy)
+ : HttpProxyId(httpProxyId)
+ , GetCachePolicy(std::move(getCachePolicy))
+ {}
+
+ void Bootstrap(const NActors::TActorContext&) {
+ //
+ Become(&THttpOutgoingCacheActor::StateWork, RefreshTimeout, new NActors::TEvents::TEvWakeup());
+ }
+
+ static TString GetCacheHeadersKey(const NHttp::THttpOutgoingRequest* request, const TCachePolicy& policy) {
+ TStringBuilder key;
+ if (!policy.HeadersToCacheKey.empty()) {
+ NHttp::THeaders headers(request->Headers);
+ for (const TString& header : policy.HeadersToCacheKey) {
+ key << headers[header];
+ }
+ }
+ return key;
+ }
+
+ static TCacheKey GetCacheKey(const NHttp::THttpOutgoingRequest* request, const TCachePolicy& policy) {
+ return { ToString(request->Host), ToString(request->URL), GetCacheHeadersKey(request, policy) };
+ }
+
+ void Handle(NHttp::TEvHttpProxy::TEvHttpOutgoingResponse::TPtr event, const NActors::TActorContext& ctx) {
+ ctx.Send(event->Forward(HttpProxyId));
+ }
+
+ void Handle(NHttp::TEvHttpProxy::TEvHttpIncomingRequest::TPtr event, const NActors::TActorContext& ctx) {
+ ctx.Send(event->Forward(HttpProxyId));
+ }
+
+ void Handle(NHttp::TEvHttpProxy::TEvAddListeningPort::TPtr event, const NActors::TActorContext& ctx) {
+ ctx.Send(event->Forward(HttpProxyId));
+ }
+
+ void Handle(NHttp::TEvHttpProxy::TEvRegisterHandler::TPtr event, const NActors::TActorContext& ctx) {
+ ctx.Send(event->Forward(HttpProxyId));
+ }
+
+ void Handle(NHttp::TEvHttpProxy::TEvHttpIncomingResponse::TPtr event, const NActors::TActorContext& ctx) {
+ NHttp::THttpOutgoingRequestPtr request(event->Get()->Request);
+ NHttp::THttpIncomingResponsePtr response(event->Get()->Response);
+ auto itRequests = OutgoingRequests.find(request.Get());
+ if (itRequests == OutgoingRequests.end()) {
+ LOG_ERROR_S(ctx, HttpLog, "Cache received response to unknown request " << request->Host << request->URL);
+ return;
+ }
+ auto key = itRequests->second;
+ OutgoingRequests.erase(itRequests);
+ auto it = Cache.find(key);
+ if (it == Cache.end()) {
+ LOG_ERROR_S(ctx, HttpLog, "Cache received response to unknown cache key " << request->Host << request->URL);
+ return;
+ }
+ TCacheRecord& cacheRecord = it->second;
+ cacheRecord.OutgoingRequest.Reset();
+ for (auto& waiter : cacheRecord.Waiters) {
+ NHttp::THttpIncomingResponsePtr response2;
+ TString error2;
+ if (response != nullptr) {
+ response2 = response->Duplicate(waiter->Get()->Request);
+ }
+ if (!event->Get()->Error.empty()) {
+ error2 = event->Get()->Error;
+ }
+ ctx.Send(waiter->Sender, new NHttp::TEvHttpProxy::TEvHttpIncomingResponse(waiter->Get()->Request, response2, error2));
+ }
+ cacheRecord.Waiters.clear();
+ TString error;
+ if (event->Get()->Error.empty()) {
+ if (event->Get()->Response != nullptr && event->Get()->Response->Status != "200") {
+ error = event->Get()->Response->Message;
+ }
+ } else {
+ error = event->Get()->Error;
+ }
+ if (!error.empty()) {
+ LOG_WARN_S(ctx, HttpLog, "Error from " << cacheRecord.GetName() << ": " << error);
+ }
+ LOG_DEBUG_S(ctx, HttpLog, "OutgoingUpdate " << cacheRecord.GetName());
+ cacheRecord.UpdateResponse(response, event->Get()->Error, ctx.Now());
+ RefreshQueue.push({it->first, it->second.RefreshTime});
+ LOG_DEBUG_S(ctx, HttpLog, "OutgoingSchedule " << cacheRecord.GetName() << " at " << cacheRecord.RefreshTime << " until " << cacheRecord.DeathTime);
+ }
+
+ void Handle(NHttp::TEvHttpProxy::TEvHttpOutgoingRequest::TPtr event, const NActors::TActorContext& ctx) {
+ const NHttp::THttpOutgoingRequest* request = event->Get()->Request.Get();
+ auto policy = GetCachePolicy(request);
+ if (policy.TimeToExpire == TDuration()) {
+ ctx.Send(event->Forward(HttpProxyId));
+ return;
+ }
+ auto key = GetCacheKey(request, policy);
+ auto it = Cache.find(key);
+ if (it != Cache.end()) {
+ if (it->second.IsValid()) {
+ LOG_DEBUG_S(ctx, HttpLog, "OutgoingRespond "
+ << it->second.GetName()
+ << " ("
+ << ((it->second.Response != nullptr) ? ToString(it->second.Response->Size()) : TString("error"))
+ << ")");
+ NHttp::THttpIncomingResponsePtr response = it->second.Response;
+ if (response != nullptr) {
+ response = response->Duplicate(event->Get()->Request);
+ }
+ ctx.Send(event->Sender,
+ new NHttp::TEvHttpProxy::TEvHttpIncomingResponse(event->Get()->Request,
+ response,
+ it->second.Error));
+ it->second.DeathTime = ctx.Now() + it->second.CachePolicy.TimeToExpire; // prolong active cache items
+ return;
+ }
+ } else {
+ it = Cache.emplace(key, policy).first;
+ it->second.Request = event->Get()->Request;
+ it->second.Timeout = event->Get()->Timeout;
+ it->second.OutgoingRequest = it->second.Request->Duplicate();
+ OutgoingRequests[it->second.OutgoingRequest.Get()] = key;
+ LOG_DEBUG_S(ctx, HttpLog, "OutgoingInitiate " << it->second.GetName());
+ ctx.Send(HttpProxyId, new NHttp::TEvHttpProxy::TEvHttpOutgoingRequest(it->second.OutgoingRequest, it->second.Timeout));
+ }
+ it->second.DeathTime = ctx.Now() + it->second.CachePolicy.TimeToExpire;
+ it->second.Waiters.emplace_back(std::move(event));
+ }
+
+ void HandleRefresh(const NActors::TActorContext& ctx) {
+ while (!RefreshQueue.empty() && RefreshQueue.top().RefreshTime <= ctx.Now()) {
+ TRefreshRecord rrec = RefreshQueue.top();
+ RefreshQueue.pop();
+ auto it = Cache.find(rrec.Key);
+ if (it != Cache.end()) {
+ if (it->second.DeathTime > ctx.Now()) {
+ LOG_DEBUG_S(ctx, HttpLog, "OutgoingRefresh " << it->second.GetName());
+ it->second.OutgoingRequest = it->second.Request->Duplicate();
+ OutgoingRequests[it->second.OutgoingRequest.Get()] = it->first;
+ ctx.Send(HttpProxyId, new NHttp::TEvHttpProxy::TEvHttpOutgoingRequest(it->second.OutgoingRequest, it->second.Timeout));
+ } else {
+ LOG_DEBUG_S(ctx, HttpLog, "OutgoingForget " << it->second.GetName());
+ if (it->second.OutgoingRequest) {
+ OutgoingRequests.erase(it->second.OutgoingRequest.Get());
+ }
+ Cache.erase(it);
+ }
+ }
+ }
+ ctx.Schedule(RefreshTimeout, new NActors::TEvents::TEvWakeup());
+ }
+
+ STFUNC(StateWork) {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(NHttp::TEvHttpProxy::TEvHttpIncomingResponse, Handle);
+ HFunc(NHttp::TEvHttpProxy::TEvHttpOutgoingRequest, Handle);
+ HFunc(NHttp::TEvHttpProxy::TEvAddListeningPort, Handle);
+ HFunc(NHttp::TEvHttpProxy::TEvRegisterHandler, Handle);
+ HFunc(NHttp::TEvHttpProxy::TEvHttpIncomingRequest, Handle);
+ HFunc(NHttp::TEvHttpProxy::TEvHttpOutgoingResponse, Handle);
+ CFunc(NActors::TEvents::TSystem::Wakeup, HandleRefresh);
+ }
+ }
+};
+
+const TDuration THttpOutgoingCacheActor::RefreshTimeout;
+
+class THttpIncomingCacheActor : public NActors::TActorBootstrapped<THttpIncomingCacheActor>, THttpConfig {
+public:
+ using TBase = NActors::TActorBootstrapped<THttpIncomingCacheActor>;
+ NActors::TActorId HttpProxyId;
+ TGetCachePolicy GetCachePolicy;
+ static constexpr TDuration RefreshTimeout = TDuration::Seconds(1);
+ THashMap<TString, TActorId> Handlers;
+
+ struct TCacheKey {
+ TString Host;
+ TString URL;
+ TString Headers;
+
+ operator size_t() const {
+ return MultiHash(Host, URL, Headers);
+ }
+
+ TString GetId() const {
+ return MD5::Calc(Host + ':' + URL + ':' + Headers);
+ }
+ };
+
+ struct TCacheRecord {
+ TInstant RefreshTime;
+ TInstant DeathTime;
+ TCachePolicy CachePolicy;
+ TString CacheId;
+ NHttp::THttpIncomingRequestPtr Request;
+ TDuration Timeout;
+ NHttp::THttpOutgoingResponsePtr Response;
+ TVector<NHttp::TEvHttpProxy::TEvHttpIncomingRequest::TPtr> Waiters;
+ ui32 Retries = 0;
+ bool Enqueued = false;
+
+ TCacheRecord(const TCachePolicy cachePolicy)
+ : CachePolicy(cachePolicy)
+ {}
+
+ bool IsValid() const {
+ return Response != nullptr;
+ }
+
+ void InitRequest(NHttp::THttpIncomingRequestPtr request) {
+ Request = request;
+ if (CachePolicy.TimeToExpire) {
+ DeathTime = NActors::TlsActivationContext->Now() + CachePolicy.TimeToExpire;
+ }
+ }
+
+ void UpdateResponse(NHttp::THttpOutgoingResponsePtr response, const TString& error, TInstant now) {
+ if (error.empty() || !CachePolicy.KeepOnError) {
+ Response = response;
+ }
+ Retries = 0;
+ if (CachePolicy.TimeToRefresh) {
+ RefreshTime = now + CachePolicy.TimeToRefresh;
+ if (CachePolicy.PaceToRefresh) {
+ RefreshTime += TDuration::MilliSeconds(RandomNumber<ui64>() % CachePolicy.PaceToRefresh.MilliSeconds());
+ }
+ }
+ }
+
+ void UpdateExpireTime() {
+ if (CachePolicy.TimeToExpire) {
+ DeathTime = NActors::TlsActivationContext->Now() + CachePolicy.TimeToExpire;
+ }
+ }
+
+ TString GetName() const {
+ return TStringBuilder() << (Request->Secure ? "https://" : "http://") << Request->Host << Request->URL
+ << " (" << CacheId << ")";
+ }
+ };
+
+ struct TRefreshRecord {
+ TCacheKey Key;
+ TInstant RefreshTime;
+
+ bool operator <(const TRefreshRecord& b) const {
+ return RefreshTime > b.RefreshTime;
+ }
+ };
+
+ THashMap<TCacheKey, TCacheRecord> Cache;
+ TPriorityQueue<TRefreshRecord> RefreshQueue;
+ THashMap<THttpIncomingRequest*, TCacheKey> IncomingRequests;
+
+ THttpIncomingCacheActor(const NActors::TActorId& httpProxyId, TGetCachePolicy getCachePolicy)
+ : HttpProxyId(httpProxyId)
+ , GetCachePolicy(std::move(getCachePolicy))
+ {}
+
+ void Bootstrap(const NActors::TActorContext&) {
+ //
+ Become(&THttpIncomingCacheActor::StateWork, RefreshTimeout, new NActors::TEvents::TEvWakeup());
+ }
+
+ static TString GetCacheHeadersKey(const NHttp::THttpIncomingRequest* request, const TCachePolicy& policy) {
+ TStringBuilder key;
+ if (!policy.HeadersToCacheKey.empty()) {
+ NHttp::THeaders headers(request->Headers);
+ for (const TString& header : policy.HeadersToCacheKey) {
+ key << headers[header];
+ }
+ }
+ return key;
+ }
+
+ static TCacheKey GetCacheKey(const NHttp::THttpIncomingRequest* request, const TCachePolicy& policy) {
+ return { ToString(request->Host), ToString(request->URL), GetCacheHeadersKey(request, policy) };
+ }
+
+ TActorId GetRequestHandler(NHttp::THttpIncomingRequestPtr request) {
+ TStringBuf url = request->URL.Before('?');
+ THashMap<TString, TActorId>::iterator it;
+ while (!url.empty()) {
+ it = Handlers.find(url);
+ if (it != Handlers.end()) {
+ return it->second;
+ } else {
+ if (url.EndsWith('/')) {
+ url.Trunc(url.size() - 1);
+ }
+ size_t pos = url.rfind('/');
+ if (pos == TStringBuf::npos) {
+ break;
+ } else {
+ url = url.substr(0, pos + 1);
+ }
+ }
+ }
+ return {};
+ }
+
+ void SendCacheRequest(const TCacheKey& cacheKey, TCacheRecord& cacheRecord, const NActors::TActorContext& ctx) {
+ cacheRecord.Request = cacheRecord.Request->Duplicate();
+ IncomingRequests[cacheRecord.Request.Get()] = cacheKey;
+ TActorId handler = GetRequestHandler(cacheRecord.Request);
+ if (handler) {
+ Send(handler, new NHttp::TEvHttpProxy::TEvHttpIncomingRequest(cacheRecord.Request));
+ } else {
+ LOG_ERROR_S(ctx, HttpLog, "Can't find cache handler for " << cacheRecord.GetName());
+ }
+ }
+
+ void DropCacheRecord(THashMap<TCacheKey, TCacheRecord>::iterator it) {
+ if (it->second.Request) {
+ IncomingRequests.erase(it->second.Request.Get());
+ }
+ for (auto& waiter : it->second.Waiters) {
+ NHttp::THttpOutgoingResponsePtr response;
+ response = waiter->Get()->Request->CreateResponseGatewayTimeout("Timeout", "text/plain");
+ Send(waiter->Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response));
+ }
+ Cache.erase(it);
+ }
+
+ void Handle(NHttp::TEvHttpProxy::TEvHttpIncomingResponse::TPtr event, const NActors::TActorContext& ctx) {
+ ctx.Send(event->Forward(HttpProxyId));
+ }
+
+ void Handle(NHttp::TEvHttpProxy::TEvHttpOutgoingRequest::TPtr event, const NActors::TActorContext& ctx) {
+ ctx.Send(event->Forward(HttpProxyId));
+ }
+
+ void Handle(NHttp::TEvHttpProxy::TEvAddListeningPort::TPtr event, const NActors::TActorContext& ctx) {
+ ctx.Send(event->Forward(HttpProxyId));
+ }
+
+ void Handle(NHttp::TEvHttpProxy::TEvRegisterHandler::TPtr event, const NActors::TActorContext& ctx) {
+ Handlers[event->Get()->Path] = event->Get()->Handler;
+ ctx.Send(HttpProxyId, new NHttp::TEvHttpProxy::TEvRegisterHandler(event->Get()->Path, ctx.SelfID));
+ }
+
+ void Handle(NHttp::TEvHttpProxy::TEvHttpOutgoingResponse::TPtr event, const NActors::TActorContext& ctx) {
+ NHttp::THttpIncomingRequestPtr request(event->Get()->Response->GetRequest());
+ NHttp::THttpOutgoingResponsePtr response(event->Get()->Response);
+ auto itRequests = IncomingRequests.find(request.Get());
+ if (itRequests == IncomingRequests.end()) {
+ LOG_ERROR_S(ctx, HttpLog, "Cache received response to unknown request " << request->Host << request->URL);
+ return;
+ }
+
+ TCacheKey key = itRequests->second;
+ auto it = Cache.find(key);
+ if (it == Cache.end()) {
+ LOG_ERROR_S(ctx, HttpLog, "Cache received response to unknown cache key " << request->Host << request->URL);
+ return;
+ }
+
+ IncomingRequests.erase(itRequests);
+ TCacheRecord& cacheRecord = it->second;
+ TStringBuf status;
+ TString error;
+
+ if (event->Get()->Response != nullptr) {
+ status = event->Get()->Response->Status;
+ if (!status.StartsWith("2")) {
+ error = event->Get()->Response->Message;
+ }
+ }
+ if (cacheRecord.CachePolicy.RetriesCount > 0) {
+ auto itStatusToRetry = std::find(cacheRecord.CachePolicy.StatusesToRetry.begin(), cacheRecord.CachePolicy.StatusesToRetry.end(), status);
+ if (itStatusToRetry != cacheRecord.CachePolicy.StatusesToRetry.end()) {
+ if (cacheRecord.Retries < cacheRecord.CachePolicy.RetriesCount) {
+ ++cacheRecord.Retries;
+ LOG_WARN_S(ctx, HttpLog, "IncomingRetry " << cacheRecord.GetName() << ": " << status << " " << error);
+ SendCacheRequest(key, cacheRecord, ctx);
+ return;
+ }
+ }
+ }
+ for (auto& waiter : cacheRecord.Waiters) {
+ NHttp::THttpOutgoingResponsePtr response2;
+ response2 = response->Duplicate(waiter->Get()->Request);
+ ctx.Send(waiter->Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response2));
+ }
+ cacheRecord.Waiters.clear();
+ if (!error.empty()) {
+ LOG_WARN_S(ctx, HttpLog, "Error from " << cacheRecord.GetName() << ": " << error);
+ if (!cacheRecord.Response) {
+ LOG_DEBUG_S(ctx, HttpLog, "IncomingDiscard " << cacheRecord.GetName());
+ DropCacheRecord(it);
+ return;
+ }
+ }
+ if (cacheRecord.CachePolicy.TimeToRefresh) {
+ LOG_DEBUG_S(ctx, HttpLog, "IncomingUpdate " << cacheRecord.GetName());
+ cacheRecord.UpdateResponse(response, error, ctx.Now());
+ if (!cacheRecord.Enqueued) {
+ RefreshQueue.push({it->first, it->second.RefreshTime});
+ cacheRecord.Enqueued = true;
+ }
+ LOG_DEBUG_S(ctx, HttpLog, "IncomingSchedule " << cacheRecord.GetName() << " at " << cacheRecord.RefreshTime << " until " << cacheRecord.DeathTime);
+ } else {
+ LOG_DEBUG_S(ctx, HttpLog, "IncomingDrop " << cacheRecord.GetName());
+ DropCacheRecord(it);
+ }
+ }
+
+ void Handle(NHttp::TEvHttpProxy::TEvHttpIncomingRequest::TPtr event, const NActors::TActorContext& ctx) {
+ const NHttp::THttpIncomingRequest* request = event->Get()->Request.Get();
+ TCachePolicy policy = GetCachePolicy(request);
+ if (policy.TimeToExpire == TDuration() && policy.RetriesCount == 0) {
+ TActorId handler = GetRequestHandler(event->Get()->Request);
+ if (handler) {
+ ctx.Send(event->Forward(handler));
+ }
+ return;
+ }
+ auto key = GetCacheKey(request, policy);
+ auto it = Cache.find(key);
+ if (it != Cache.end() && !policy.DiscardCache) {
+ it->second.UpdateExpireTime();
+ if (it->second.IsValid()) {
+ LOG_DEBUG_S(ctx, HttpLog, "IncomingRespond "
+ << it->second.GetName()
+ << " ("
+ << ((it->second.Response != nullptr) ? ToString(it->second.Response->Size()) : TString("error"))
+ << ")");
+ NHttp::THttpOutgoingResponsePtr response = it->second.Response;
+ if (response != nullptr) {
+ response = response->Duplicate(event->Get()->Request);
+ }
+ ctx.Send(event->Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response));
+ return;
+ }
+ } else {
+ it = Cache.emplace(key, policy).first;
+ it->second.CacheId = key.GetId(); // for debugging
+ it->second.InitRequest(event->Get()->Request);
+ if (policy.DiscardCache) {
+ LOG_DEBUG_S(ctx, HttpLog, "IncomingDiscardCache " << it->second.GetName());
+ }
+ LOG_DEBUG_S(ctx, HttpLog, "IncomingInitiate " << it->second.GetName());
+ SendCacheRequest(key, it->second, ctx);
+ }
+ it->second.Waiters.emplace_back(std::move(event));
+ }
+
+ void HandleRefresh(const NActors::TActorContext& ctx) {
+ while (!RefreshQueue.empty() && RefreshQueue.top().RefreshTime <= ctx.Now()) {
+ TRefreshRecord rrec = RefreshQueue.top();
+ RefreshQueue.pop();
+ auto it = Cache.find(rrec.Key);
+ if (it != Cache.end()) {
+ it->second.Enqueued = false;
+ if (it->second.DeathTime > ctx.Now()) {
+ LOG_DEBUG_S(ctx, HttpLog, "IncomingRefresh " << it->second.GetName());
+ SendCacheRequest(it->first, it->second, ctx);
+ } else {
+ LOG_DEBUG_S(ctx, HttpLog, "IncomingForget " << it->second.GetName());
+ DropCacheRecord(it);
+ }
+ }
+ }
+ ctx.Schedule(RefreshTimeout, new NActors::TEvents::TEvWakeup());
+ }
+
+ STFUNC(StateWork) {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(NHttp::TEvHttpProxy::TEvHttpIncomingResponse, Handle);
+ HFunc(NHttp::TEvHttpProxy::TEvHttpOutgoingRequest, Handle);
+ HFunc(NHttp::TEvHttpProxy::TEvAddListeningPort, Handle);
+ HFunc(NHttp::TEvHttpProxy::TEvRegisterHandler, Handle);
+ HFunc(NHttp::TEvHttpProxy::TEvHttpIncomingRequest, Handle);
+ HFunc(NHttp::TEvHttpProxy::TEvHttpOutgoingResponse, Handle);
+ CFunc(NActors::TEvents::TSystem::Wakeup, HandleRefresh);
+ }
+ }
+};
+
+TCachePolicy GetDefaultCachePolicy(const THttpRequest* request, const TCachePolicy& defaultPolicy) {
+ TCachePolicy policy = defaultPolicy;
+ THeaders headers(request->Headers);
+ TStringBuf cacheControl(headers["Cache-Control"]);
+ while (TStringBuf cacheItem = cacheControl.NextTok(',')) {
+ Trim(cacheItem, ' ');
+ if (cacheItem == "no-store" || cacheItem == "no-cache") {
+ policy.DiscardCache = true;
+ }
+ TStringBuf itemName = cacheItem.NextTok('=');
+ TrimEnd(itemName, ' ');
+ TrimBegin(cacheItem, ' ');
+ if (itemName == "max-age") {
+ policy.TimeToRefresh = policy.TimeToExpire = TDuration::Seconds(FromString(cacheItem));
+ }
+ if (itemName == "min-fresh") {
+ policy.TimeToRefresh = policy.TimeToExpire = TDuration::Seconds(FromString(cacheItem));
+ }
+ if (itemName == "stale-if-error") {
+ policy.KeepOnError = true;
+ }
+ }
+ return policy;
+}
+
+NActors::IActor* CreateHttpCache(const NActors::TActorId& httpProxyId, TGetCachePolicy cachePolicy) {
+ return new THttpOutgoingCacheActor(httpProxyId, std::move(cachePolicy));
+}
+
+NActors::IActor* CreateOutgoingHttpCache(const NActors::TActorId& httpProxyId, TGetCachePolicy cachePolicy) {
+ return new THttpOutgoingCacheActor(httpProxyId, std::move(cachePolicy));
+}
+
+NActors::IActor* CreateIncomingHttpCache(const NActors::TActorId& httpProxyId, TGetCachePolicy cachePolicy) {
+ return new THttpIncomingCacheActor(httpProxyId, std::move(cachePolicy));
+}
+
+}
diff --git a/library/cpp/actors/http/http_cache.h b/library/cpp/actors/http/http_cache.h
new file mode 100644
index 0000000000..ac38bdcac8
--- /dev/null
+++ b/library/cpp/actors/http/http_cache.h
@@ -0,0 +1,27 @@
+#pragma once
+#include <library/cpp/actors/core/actor.h>
+#include "http.h"
+
+namespace NHttp {
+
+struct TCachePolicy {
+ TDuration TimeToExpire;
+ TDuration TimeToRefresh;
+ TDuration PaceToRefresh;
+ bool KeepOnError = false;
+ bool DiscardCache = false;
+ TArrayRef<TString> HeadersToCacheKey;
+ TArrayRef<TString> StatusesToRetry;
+ ui32 RetriesCount = 0;
+
+ TCachePolicy() = default;
+};
+
+using TGetCachePolicy = std::function<TCachePolicy(const THttpRequest*)>;
+
+NActors::IActor* CreateHttpCache(const NActors::TActorId& httpProxyId, TGetCachePolicy cachePolicy);
+NActors::IActor* CreateOutgoingHttpCache(const NActors::TActorId& httpProxyId, TGetCachePolicy cachePolicy);
+NActors::IActor* CreateIncomingHttpCache(const NActors::TActorId& httpProxyId, TGetCachePolicy cachePolicy);
+TCachePolicy GetDefaultCachePolicy(const THttpRequest* request, const TCachePolicy& policy = TCachePolicy());
+
+}
diff --git a/library/cpp/actors/http/http_config.h b/library/cpp/actors/http/http_config.h
new file mode 100644
index 0000000000..faeff79449
--- /dev/null
+++ b/library/cpp/actors/http/http_config.h
@@ -0,0 +1,19 @@
+#pragma once
+#include <util/network/sock.h>
+#include <library/cpp/actors/core/log.h>
+#include <library/cpp/actors/protos/services_common.pb.h>
+
+namespace NHttp {
+
+struct THttpConfig {
+ static constexpr NActors::NLog::EComponent HttpLog = NActorsServices::EServiceCommon::HTTP;
+ static constexpr size_t BUFFER_SIZE = 64 * 1024;
+ static constexpr size_t BUFFER_MIN_STEP = 10 * 1024;
+ static constexpr int LISTEN_QUEUE = 10;
+ static constexpr TDuration SOCKET_TIMEOUT = TDuration::MilliSeconds(60000);
+ static constexpr TDuration CONNECTION_TIMEOUT = TDuration::MilliSeconds(60000);
+ using SocketType = TInet6StreamSocket;
+ using SocketAddressType = TSockAddrInet6;
+};
+
+}
diff --git a/library/cpp/actors/http/http_proxy.cpp b/library/cpp/actors/http/http_proxy.cpp
new file mode 100644
index 0000000000..36c6855d93
--- /dev/null
+++ b/library/cpp/actors/http/http_proxy.cpp
@@ -0,0 +1,314 @@
+#include <library/cpp/actors/core/events.h>
+#include <library/cpp/monlib/metrics/metric_registry.h>
+#include "http_proxy.h"
+
+namespace NHttp {
+
+class THttpProxy : public NActors::TActorBootstrapped<THttpProxy>, public THttpConfig {
+public:
+ IActor* AddListeningPort(TEvHttpProxy::TEvAddListeningPort::TPtr event, const NActors::TActorContext& ctx) {
+ IActor* listeningSocket = CreateHttpAcceptorActor(ctx.SelfID, Poller);
+ TActorId acceptorId = ctx.Register(listeningSocket);
+ ctx.Send(event->Forward(acceptorId));
+ Acceptors.emplace_back(acceptorId);
+ return listeningSocket;
+ }
+
+ IActor* AddOutgoingConnection(const TString& address, bool secure, const NActors::TActorContext& ctx) {
+ IActor* connectionSocket = CreateOutgoingConnectionActor(ctx.SelfID, address, secure, Poller);
+ TActorId connectionId = ctx.Register(connectionSocket);
+ Connections.emplace(connectionId);
+ return connectionSocket;
+ }
+
+ void Bootstrap(const NActors::TActorContext& ctx) {
+ Poller = ctx.Register(NActors::CreatePollerActor());
+ Become(&THttpProxy::StateWork);
+ }
+
+ THttpProxy(NMonitoring::TMetricRegistry& sensors)
+ : Sensors(sensors)
+ {}
+
+protected:
+ STFUNC(StateWork) {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(TEvHttpProxy::TEvAddListeningPort, Handle);
+ HFunc(TEvHttpProxy::TEvRegisterHandler, Handle);
+ HFunc(TEvHttpProxy::TEvHttpIncomingRequest, Handle);
+ HFunc(TEvHttpProxy::TEvHttpOutgoingRequest, Handle);
+ HFunc(TEvHttpProxy::TEvHttpIncomingResponse, Handle);
+ HFunc(TEvHttpProxy::TEvHttpOutgoingResponse, Handle);
+ HFunc(TEvHttpProxy::TEvHttpAcceptorClosed, Handle);
+ HFunc(TEvHttpProxy::TEvHttpConnectionClosed, Handle);
+ HFunc(TEvHttpProxy::TEvResolveHostRequest, Handle);
+ HFunc(TEvHttpProxy::TEvReportSensors, Handle);
+ HFunc(NActors::TEvents::TEvPoison, Handle);
+ }
+ }
+
+ void PassAway() override {
+ Send(Poller, new NActors::TEvents::TEvPoisonPill());
+ for (const NActors::TActorId& connection : Connections) {
+ Send(connection, new NActors::TEvents::TEvPoisonPill());
+ }
+ for (const NActors::TActorId& acceptor : Acceptors) {
+ Send(acceptor, new NActors::TEvents::TEvPoisonPill());
+ }
+ NActors::TActorBootstrapped<THttpProxy>::PassAway();
+ }
+
+ void Handle(TEvHttpProxy::TEvHttpIncomingRequest::TPtr event, const NActors::TActorContext& ctx) {
+ TStringBuf url = event->Get()->Request->URL.Before('?');
+ THashMap<TString, TActorId>::iterator it;
+ while (!url.empty()) {
+ it = Handlers.find(url);
+ if (it != Handlers.end()) {
+ ctx.Send(event->Forward(it->second));
+ return;
+ } else {
+ if (url.EndsWith('/')) {
+ url.Trunc(url.size() - 1);
+ }
+ size_t pos = url.rfind('/');
+ if (pos == TStringBuf::npos) {
+ break;
+ } else {
+ url = url.substr(0, pos + 1);
+ }
+ }
+ }
+ ctx.Send(event->Sender, new TEvHttpProxy::TEvHttpOutgoingResponse(event->Get()->Request->CreateResponseNotFound()));
+ }
+
+ void Handle(TEvHttpProxy::TEvHttpIncomingResponse::TPtr event, const NActors::TActorContext& ctx) {
+ Y_UNUSED(event);
+ Y_UNUSED(ctx);
+ Y_FAIL("This event shouldn't be there, it should go to the http connection owner directly");
+ }
+
+ void Handle(TEvHttpProxy::TEvHttpOutgoingResponse::TPtr event, const NActors::TActorContext& ctx) {
+ Y_UNUSED(event);
+ Y_UNUSED(ctx);
+ Y_FAIL("This event shouldn't be there, it should go to the http connection directly");
+ }
+
+ void Handle(TEvHttpProxy::TEvHttpOutgoingRequest::TPtr event, const NActors::TActorContext& ctx) {
+ TStringBuf host(event->Get()->Request->Host);
+ bool secure(event->Get()->Request->Secure);
+ NActors::IActor* actor = AddOutgoingConnection(TString(host), secure, ctx);
+ ctx.Send(event->Forward(actor->SelfId()));
+ }
+
+ void Handle(TEvHttpProxy::TEvAddListeningPort::TPtr event, const NActors::TActorContext& ctx) {
+ AddListeningPort(event, ctx);
+ }
+
+ void Handle(TEvHttpProxy::TEvHttpAcceptorClosed::TPtr event, const NActors::TActorContext&) {
+ for (auto it = Acceptors.begin(); it != Acceptors.end(); ++it) {
+ if (*it == event->Get()->ConnectionID) {
+ Acceptors.erase(it);
+ break;
+ }
+ }
+ }
+
+ void Handle(TEvHttpProxy::TEvHttpConnectionClosed::TPtr event, const NActors::TActorContext&) {
+ Connections.erase(event->Get()->ConnectionID);
+ }
+
+ void Handle(TEvHttpProxy::TEvRegisterHandler::TPtr event, const NActors::TActorContext&) {
+ Handlers[event->Get()->Path] = event->Get()->Handler;
+ }
+
+ void Handle(TEvHttpProxy::TEvResolveHostRequest::TPtr event, const NActors::TActorContext& ctx) {
+ const TString& host(event->Get()->Host);
+ auto it = Hosts.find(host);
+ if (it == Hosts.end() || it->second.DeadlineTime > ctx.Now()) {
+ TString addressPart;
+ TIpPort portPart = 0;
+ CrackAddress(host, addressPart, portPart);
+ if (IsIPv6(addressPart)) {
+ TSockAddrInet6 address(addressPart.c_str(), portPart);
+ if (it == Hosts.end()) {
+ it = Hosts.emplace(host, THostEntry()).first;
+ }
+ it->second.Address = address;
+ it->second.DeadlineTime = ctx.Now() + HostsTimeToLive;
+ } else {
+ // TODO(xenoxeno): move to another, possible blocking actor
+ try {
+ const NDns::TResolvedHost* result = NDns::CachedResolve(NDns::TResolveInfo(addressPart, portPart));
+ if (result != nullptr) {
+ auto pAddr = result->Addr.Begin();
+ while (pAddr != result->Addr.End() && pAddr->ai_family != AF_INET6) {
+ ++pAddr;
+ }
+ if (pAddr == result->Addr.End()) {
+ ctx.Send(event->Sender, new TEvHttpProxy::TEvResolveHostResponse("Invalid address family resolved"));
+ return;
+ }
+ TSockAddrInet6 address = {};
+ static_cast<sockaddr_in6&>(address) = *reinterpret_cast<sockaddr_in6*>(pAddr->ai_addr);
+ LOG_DEBUG_S(ctx, HttpLog, "Host " << host << " resolved to " << address.ToString());
+ if (it == Hosts.end()) {
+ it = Hosts.emplace(host, THostEntry()).first;
+ }
+ it->second.Address = address;
+ it->second.DeadlineTime = ctx.Now() + HostsTimeToLive;
+ } else {
+ ctx.Send(event->Sender, new TEvHttpProxy::TEvResolveHostResponse("Error resolving host"));
+ return;
+ }
+ }
+ catch (const yexception& e) {
+ ctx.Send(event->Sender, new TEvHttpProxy::TEvResolveHostResponse(e.what()));
+ return;
+ }
+ }
+ }
+ ctx.Send(event->Sender, new TEvHttpProxy::TEvResolveHostResponse(it->first, it->second.Address));
+ }
+
+ void Handle(TEvHttpProxy::TEvReportSensors::TPtr event, const NActors::TActorContext&) {
+ const TEvHttpProxy::TEvReportSensors& sensors(*event->Get());
+ const static TString urlNotFound = "not-found";
+ const TString& url = (sensors.Status == "404" ? urlNotFound : sensors.Url);
+
+ Sensors.Rate({
+ {"sensor", "count"},
+ {"direction", sensors.Direction},
+ {"peer", sensors.Host},
+ {"url", url},
+ {"status", sensors.Status}
+ })->Inc();
+ Sensors.HistogramRate({
+ {"sensor", "time_us"},
+ {"direction", sensors.Direction},
+ {"peer", sensors.Host},
+ {"url", url},
+ {"status", sensors.Status}
+ },
+ NMonitoring::ExplicitHistogram({1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 30000, 60000}))->Record(sensors.Time.MicroSeconds());
+ Sensors.HistogramRate({
+ {"sensor", "time_ms"},
+ {"direction", sensors.Direction},
+ {"peer", sensors.Host},
+ {"url", url},
+ {"status", sensors.Status}
+ },
+ NMonitoring::ExplicitHistogram({1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 30000, 60000}))->Record(sensors.Time.MilliSeconds());
+ }
+
+ void Handle(NActors::TEvents::TEvPoison::TPtr, const NActors::TActorContext&) {
+ PassAway();
+ }
+
+ NActors::TActorId Poller;
+ TVector<TActorId> Acceptors;
+
+ struct THostEntry {
+ TSockAddrInet6 Address;
+ TInstant DeadlineTime;
+ };
+
+ static constexpr TDuration HostsTimeToLive = TDuration::Seconds(60);
+
+ THashMap<TString, THostEntry> Hosts;
+ THashMap<TString, TActorId> Handlers;
+ THashSet<TActorId> Connections; // outgoing
+ NMonitoring::TMetricRegistry& Sensors;
+};
+
+TEvHttpProxy::TEvReportSensors* BuildOutgoingRequestSensors(const THttpOutgoingRequestPtr& request, const THttpIncomingResponsePtr& response) {
+ return new TEvHttpProxy::TEvReportSensors(
+ "out",
+ request->Host,
+ request->URL.Before('?'),
+ response ? response->Status : "504",
+ TDuration::Seconds(std::abs(request->Timer.Passed()))
+ );
+}
+
+TEvHttpProxy::TEvReportSensors* BuildIncomingRequestSensors(const THttpIncomingRequestPtr& request, const THttpOutgoingResponsePtr& response) {
+ return new TEvHttpProxy::TEvReportSensors(
+ "in",
+ request->Host,
+ request->URL.Before('?'),
+ response->Status,
+ TDuration::Seconds(std::abs(request->Timer.Passed()))
+ );
+}
+
+NActors::IActor* CreateHttpProxy(NMonitoring::TMetricRegistry& sensors) {
+ return new THttpProxy(sensors);
+}
+
+bool IsIPv6(const TString& host) {
+ return host.find_first_not_of(":0123456789abcdef") == TString::npos;
+}
+
+bool CrackURL(TStringBuf url, TStringBuf& scheme, TStringBuf& host, TStringBuf& uri) {
+ url.TrySplit("://", scheme, url);
+ auto pos = url.find('/');
+ if (pos == TStringBuf::npos) {
+ host = url;
+ } else {
+ host = url.substr(0, pos);
+ uri = url.substr(pos);
+ }
+ return true;
+}
+
+void CrackAddress(const TString& address, TString& hostname, TIpPort& port) {
+ size_t first_colon_pos = address.find(':');
+ if (first_colon_pos != TString::npos) {
+ size_t last_colon_pos = address.rfind(':');
+ if (last_colon_pos == first_colon_pos) {
+ // only one colon, simple case
+ port = FromStringWithDefault<TIpPort>(address.substr(first_colon_pos + 1), 0);
+ hostname = address.substr(0, first_colon_pos);
+ } else {
+ // ipv6?
+ size_t closing_bracket_pos = address.rfind(']');
+ if (closing_bracket_pos == TString::npos || closing_bracket_pos > last_colon_pos) {
+ // whole address is ipv6 host
+ hostname = address;
+ } else {
+ port = FromStringWithDefault<TIpPort>(address.substr(last_colon_pos + 1), 0);
+ hostname = address.substr(0, last_colon_pos);
+ }
+ if (hostname.StartsWith('[') && hostname.EndsWith(']')) {
+ hostname = hostname.substr(1, hostname.size() - 2);
+ }
+ }
+ } else {
+ hostname = address;
+ }
+}
+
+
+void TrimBegin(TStringBuf& target, char delim) {
+ while (!target.empty() && *target.begin() == delim) {
+ target.Skip(1);
+ }
+}
+
+void TrimEnd(TStringBuf& target, char delim) {
+ while (!target.empty() && target.back() == delim) {
+ target.Trunc(target.size() - 1);
+ }
+}
+
+void Trim(TStringBuf& target, char delim) {
+ TrimBegin(target, delim);
+ TrimEnd(target, delim);
+}
+
+void TrimEnd(TString& target, char delim) {
+ while (!target.empty() && target.back() == delim) {
+ target.resize(target.size() - 1);
+ }
+}
+
+}
diff --git a/library/cpp/actors/http/http_proxy.h b/library/cpp/actors/http/http_proxy.h
new file mode 100644
index 0000000000..afd0170997
--- /dev/null
+++ b/library/cpp/actors/http/http_proxy.h
@@ -0,0 +1,239 @@
+#pragma once
+#include <library/cpp/actors/core/actorsystem.h>
+#include <library/cpp/actors/core/actor.h>
+#include <library/cpp/actors/core/hfunc.h>
+#include <library/cpp/actors/core/events.h>
+#include <library/cpp/actors/core/event_local.h>
+#include <library/cpp/actors/core/actor_bootstrapped.h>
+#include <library/cpp/actors/core/log.h>
+#include <library/cpp/actors/interconnect/poller_actor.h>
+#include <library/cpp/dns/cache.h>
+#include <library/cpp/monlib/metrics/metric_registry.h>
+#include <util/generic/variant.h>
+#include "http.h"
+#include "http_proxy_ssl.h"
+
+namespace NHttp {
+
+struct TSocketDescriptor : NActors::TSharedDescriptor, THttpConfig {
+ SocketType Socket;
+
+ int GetDescriptor() override {
+ return static_cast<SOCKET>(Socket);
+ }
+};
+
+struct TEvHttpProxy {
+ enum EEv {
+ EvAddListeningPort = EventSpaceBegin(NActors::TEvents::ES_HTTP),
+ EvConfirmListen,
+ EvRegisterHandler,
+ EvHttpIncomingRequest,
+ EvHttpOutgoingRequest,
+ EvHttpIncomingResponse,
+ EvHttpOutgoingResponse,
+ EvHttpConnectionOpened,
+ EvHttpConnectionClosed,
+ EvHttpAcceptorClosed,
+ EvResolveHostRequest,
+ EvResolveHostResponse,
+ EvReportSensors,
+ EvEnd
+ };
+
+ static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_HTTP), "ES_HTTP event space is too small.");
+
+ struct TEvAddListeningPort : NActors::TEventLocal<TEvAddListeningPort, EvAddListeningPort> {
+ TIpPort Port;
+ TString WorkerName;
+ bool Secure = false;
+ TString CertificateFile;
+ TString PrivateKeyFile;
+ TString SslCertificatePem;
+
+ TEvAddListeningPort(TIpPort port)
+ : Port(port)
+ {}
+
+ TEvAddListeningPort(TIpPort port, const TString& workerName)
+ : Port(port)
+ , WorkerName(workerName)
+ {}
+ };
+
+ struct TEvConfirmListen : NActors::TEventLocal<TEvConfirmListen, EvConfirmListen> {
+ THttpConfig::SocketAddressType Address;
+
+ TEvConfirmListen(const THttpConfig::SocketAddressType& address)
+ : Address(address)
+ {}
+ };
+
+ struct TEvRegisterHandler : NActors::TEventLocal<TEvRegisterHandler, EvRegisterHandler> {
+ TString Path;
+ TActorId Handler;
+
+ TEvRegisterHandler(const TString& path, const TActorId& handler)
+ : Path(path)
+ , Handler(handler)
+ {}
+ };
+
+ struct TEvHttpIncomingRequest : NActors::TEventLocal<TEvHttpIncomingRequest, EvHttpIncomingRequest> {
+ THttpIncomingRequestPtr Request;
+
+ TEvHttpIncomingRequest(THttpIncomingRequestPtr request)
+ : Request(std::move(request))
+ {}
+ };
+
+ struct TEvHttpOutgoingRequest : NActors::TEventLocal<TEvHttpOutgoingRequest, EvHttpOutgoingRequest> {
+ THttpOutgoingRequestPtr Request;
+ TDuration Timeout;
+
+ TEvHttpOutgoingRequest(THttpOutgoingRequestPtr request)
+ : Request(std::move(request))
+ {}
+
+ TEvHttpOutgoingRequest(THttpOutgoingRequestPtr request, TDuration timeout)
+ : Request(std::move(request))
+ , Timeout(timeout)
+ {}
+ };
+
+ struct TEvHttpIncomingResponse : NActors::TEventLocal<TEvHttpIncomingResponse, EvHttpIncomingResponse> {
+ THttpOutgoingRequestPtr Request;
+ THttpIncomingResponsePtr Response;
+ TString Error;
+
+ TEvHttpIncomingResponse(THttpOutgoingRequestPtr request, THttpIncomingResponsePtr response, const TString& error)
+ : Request(std::move(request))
+ , Response(std::move(response))
+ , Error(error)
+ {}
+
+ TEvHttpIncomingResponse(THttpOutgoingRequestPtr request, THttpIncomingResponsePtr response)
+ : Request(std::move(request))
+ , Response(std::move(response))
+ {}
+
+ TString GetError() const {
+ TStringBuilder error;
+ if (Response != nullptr && !Response->Status.StartsWith('2')) {
+ error << Response->Status << ' ' << Response->Message;
+ }
+ if (!Error.empty()) {
+ if (!error.empty()) {
+ error << ';';
+ }
+ error << Error;
+ }
+ return error;
+ }
+ };
+
+ struct TEvHttpOutgoingResponse : NActors::TEventLocal<TEvHttpOutgoingResponse, EvHttpOutgoingResponse> {
+ THttpOutgoingResponsePtr Response;
+
+ TEvHttpOutgoingResponse(THttpOutgoingResponsePtr response)
+ : Response(std::move(response))
+ {}
+ };
+
+ struct TEvHttpConnectionOpened : NActors::TEventLocal<TEvHttpConnectionOpened, EvHttpConnectionOpened> {
+ TString PeerAddress;
+ TActorId ConnectionID;
+
+ TEvHttpConnectionOpened(const TString& peerAddress, const TActorId& connectionID)
+ : PeerAddress(peerAddress)
+ , ConnectionID(connectionID)
+ {}
+ };
+
+ struct TEvHttpConnectionClosed : NActors::TEventLocal<TEvHttpConnectionClosed, EvHttpConnectionClosed> {
+ TActorId ConnectionID;
+ TDeque<THttpIncomingRequestPtr> RecycledRequests;
+
+ TEvHttpConnectionClosed(const TActorId& connectionID)
+ : ConnectionID(connectionID)
+ {}
+
+ TEvHttpConnectionClosed(const TActorId& connectionID, TDeque<THttpIncomingRequestPtr> recycledRequests)
+ : ConnectionID(connectionID)
+ , RecycledRequests(std::move(recycledRequests))
+ {}
+ };
+
+ struct TEvHttpAcceptorClosed : NActors::TEventLocal<TEvHttpAcceptorClosed, EvHttpAcceptorClosed> {
+ TActorId ConnectionID;
+
+ TEvHttpAcceptorClosed(const TActorId& connectionID)
+ : ConnectionID(connectionID)
+ {}
+ };
+
+ struct TEvResolveHostRequest : NActors::TEventLocal<TEvResolveHostRequest, EvResolveHostRequest> {
+ TString Host;
+
+ TEvResolveHostRequest(const TString& host)
+ : Host(host)
+ {}
+ };
+
+ struct TEvResolveHostResponse : NActors::TEventLocal<TEvResolveHostResponse, EvResolveHostResponse> {
+ TString Host;
+ TSockAddrInet6 Address;
+ TString Error;
+
+ TEvResolveHostResponse(const TString& host, const TSockAddrInet6& address)
+ : Host(host)
+ , Address(address)
+ {}
+
+ TEvResolveHostResponse(const TString& error)
+ : Error(error)
+ {}
+ };
+
+ struct TEvReportSensors : NActors::TEventLocal<TEvReportSensors, EvReportSensors> {
+ TString Direction;
+ TString Host;
+ TString Url;
+ TString Status;
+ TDuration Time;
+
+ TEvReportSensors(
+ TStringBuf direction,
+ TStringBuf host,
+ TStringBuf url,
+ TStringBuf status,
+ TDuration time)
+ : Direction(direction)
+ , Host(host)
+ , Url(url)
+ , Status(status)
+ , Time(time)
+ {}
+ };
+};
+
+struct TEndpointInfo {
+ TActorId Proxy;
+ TActorId Owner;
+ TString WorkerName;
+ bool Secure;
+ TSslHelpers::TSslHolder<SSL_CTX> SecureContext;
+};
+
+NActors::IActor* CreateHttpProxy(NMonitoring::TMetricRegistry& sensors);
+NActors::IActor* CreateHttpAcceptorActor(const TActorId& owner, const TActorId& poller);
+NActors::IActor* CreateOutgoingConnectionActor(const TActorId& owner, const TString& host, bool secure, const TActorId& poller);
+NActors::IActor* CreateIncomingConnectionActor(
+ const TEndpointInfo& endpoint,
+ TIntrusivePtr<TSocketDescriptor> socket,
+ THttpConfig::SocketAddressType address,
+ THttpIncomingRequestPtr recycledRequest = nullptr);
+TEvHttpProxy::TEvReportSensors* BuildOutgoingRequestSensors(const THttpOutgoingRequestPtr& request, const THttpIncomingResponsePtr& response);
+TEvHttpProxy::TEvReportSensors* BuildIncomingRequestSensors(const THttpIncomingRequestPtr& request, const THttpOutgoingResponsePtr& response);
+
+}
diff --git a/library/cpp/actors/http/http_proxy_acceptor.cpp b/library/cpp/actors/http/http_proxy_acceptor.cpp
new file mode 100644
index 0000000000..9780541b71
--- /dev/null
+++ b/library/cpp/actors/http/http_proxy_acceptor.cpp
@@ -0,0 +1,135 @@
+#include <util/network/sock.h>
+#include "http_proxy.h"
+#include "http_proxy_ssl.h"
+
+namespace NHttp {
+
+class TAcceptorActor : public NActors::TActor<TAcceptorActor>, public THttpConfig {
+public:
+ using TBase = NActors::TActor<TAcceptorActor>;
+ const TActorId Owner;
+ const TActorId Poller;
+ TIntrusivePtr<TSocketDescriptor> Socket;
+ NActors::TPollerToken::TPtr PollerToken;
+ THashSet<TActorId> Connections;
+ TDeque<THttpIncomingRequestPtr> RecycledRequests;
+ TEndpointInfo Endpoint;
+
+ TAcceptorActor(const TActorId& owner, const TActorId& poller)
+ : NActors::TActor<TAcceptorActor>(&TAcceptorActor::StateInit)
+ , Owner(owner)
+ , Poller(poller)
+ , Socket(new TSocketDescriptor())
+ {
+ // for unit tests :(
+ CheckedSetSockOpt(Socket->Socket, SOL_SOCKET, SO_REUSEADDR, (int)true, "reuse address");
+#ifdef SO_REUSEPORT
+ CheckedSetSockOpt(Socket->Socket, SOL_SOCKET, SO_REUSEPORT, (int)true, "reuse port");
+#endif
+ }
+
+protected:
+ STFUNC(StateListening) {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(NActors::TEvPollerRegisterResult, Handle);
+ HFunc(NActors::TEvPollerReady, Handle);
+ HFunc(TEvHttpProxy::TEvHttpConnectionClosed, Handle);
+ HFunc(TEvHttpProxy::TEvReportSensors, Handle);
+ }
+ }
+
+ STFUNC(StateInit) {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(TEvHttpProxy::TEvAddListeningPort, HandleInit);
+ }
+ }
+
+ void HandleInit(TEvHttpProxy::TEvAddListeningPort::TPtr event, const NActors::TActorContext& ctx) {
+ SocketAddressType bindAddress("::", event->Get()->Port);
+ Endpoint.Owner = ctx.SelfID;
+ Endpoint.Proxy = Owner;
+ Endpoint.WorkerName = event->Get()->WorkerName;
+ Endpoint.Secure = event->Get()->Secure;
+ int err = 0;
+ if (Endpoint.Secure) {
+ if (!event->Get()->SslCertificatePem.empty()) {
+ Endpoint.SecureContext = TSslHelpers::CreateServerContext(event->Get()->SslCertificatePem);
+ } else {
+ Endpoint.SecureContext = TSslHelpers::CreateServerContext(event->Get()->CertificateFile, event->Get()->PrivateKeyFile);
+ }
+ if (Endpoint.SecureContext == nullptr) {
+ err = -1;
+ LOG_WARN_S(ctx, HttpLog, "Failed to construct server security context");
+ }
+ }
+ if (err == 0) {
+ err = Socket->Socket.Bind(&bindAddress);
+ }
+ if (err == 0) {
+ err = Socket->Socket.Listen(LISTEN_QUEUE);
+ if (err == 0) {
+ LOG_INFO_S(ctx, HttpLog, "Listening on " << bindAddress.ToString());
+ SetNonBlock(Socket->Socket);
+ ctx.Send(Poller, new NActors::TEvPollerRegister(Socket, SelfId(), SelfId()));
+ TBase::Become(&TAcceptorActor::StateListening);
+ ctx.Send(event->Sender, new TEvHttpProxy::TEvConfirmListen(bindAddress), 0, event->Cookie);
+ return;
+ }
+ }
+ LOG_WARN_S(ctx, HttpLog, "Failed to listen on " << bindAddress.ToString() << " - retrying...");
+ ctx.ExecutorThread.Schedule(TDuration::Seconds(1), event.Release());
+ }
+
+ void Die(const NActors::TActorContext& ctx) override {
+ ctx.Send(Owner, new TEvHttpProxy::TEvHttpAcceptorClosed(ctx.SelfID));
+ for (const NActors::TActorId& connection : Connections) {
+ ctx.Send(connection, new NActors::TEvents::TEvPoisonPill());
+ }
+ }
+
+ void Handle(NActors::TEvPollerRegisterResult::TPtr ev, const NActors::TActorContext& /*ctx*/) {
+ PollerToken = std::move(ev->Get()->PollerToken);
+ PollerToken->Request(true, false); // request read polling
+ }
+
+ void Handle(NActors::TEvPollerReady::TPtr, const NActors::TActorContext& ctx) {
+ TIntrusivePtr<TSocketDescriptor> socket = new TSocketDescriptor();
+ SocketAddressType addr;
+ int err;
+ while ((err = Socket->Socket.Accept(&socket->Socket, &addr)) == 0) {
+ NActors::IActor* connectionSocket = nullptr;
+ if (RecycledRequests.empty()) {
+ connectionSocket = CreateIncomingConnectionActor(Endpoint, socket, addr);
+ } else {
+ connectionSocket = CreateIncomingConnectionActor(Endpoint, socket, addr, std::move(RecycledRequests.front()));
+ RecycledRequests.pop_front();
+ }
+ NActors::TActorId connectionId = ctx.Register(connectionSocket);
+ ctx.Send(Poller, new NActors::TEvPollerRegister(socket, connectionId, connectionId));
+ Connections.emplace(connectionId);
+ socket = new TSocketDescriptor();
+ }
+ if (err == -EAGAIN || err == -EWOULDBLOCK) { // request poller for further connection polling
+ Y_VERIFY(PollerToken);
+ PollerToken->Request(true, false);
+ }
+ }
+
+ void Handle(TEvHttpProxy::TEvHttpConnectionClosed::TPtr event, const NActors::TActorContext&) {
+ Connections.erase(event->Get()->ConnectionID);
+ for (auto& req : event->Get()->RecycledRequests) {
+ req->Clear();
+ RecycledRequests.push_back(std::move(req));
+ }
+ }
+
+ void Handle(TEvHttpProxy::TEvReportSensors::TPtr event, const NActors::TActorContext& ctx) {
+ ctx.Send(event->Forward(Owner));
+ }
+};
+
+NActors::IActor* CreateHttpAcceptorActor(const TActorId& owner, const TActorId& poller) {
+ return new TAcceptorActor(owner, poller);
+}
+
+}
diff --git a/library/cpp/actors/http/http_proxy_incoming.cpp b/library/cpp/actors/http/http_proxy_incoming.cpp
new file mode 100644
index 0000000000..80fe2af53d
--- /dev/null
+++ b/library/cpp/actors/http/http_proxy_incoming.cpp
@@ -0,0 +1,302 @@
+#include "http_proxy.h"
+#include "http_proxy_sock_impl.h"
+
+namespace NHttp {
+
+using namespace NActors;
+
+template <typename TSocketImpl>
+class TIncomingConnectionActor : public TActor<TIncomingConnectionActor<TSocketImpl>>, public TSocketImpl, virtual public THttpConfig {
+public:
+ using TBase = TActor<TIncomingConnectionActor<TSocketImpl>>;
+ static constexpr bool RecycleRequests = true;
+
+ const TEndpointInfo& Endpoint;
+ SocketAddressType Address;
+ TList<THttpIncomingRequestPtr> Requests;
+ THashMap<THttpIncomingRequestPtr, THttpOutgoingResponsePtr> Responses;
+ THttpIncomingRequestPtr CurrentRequest;
+ THttpOutgoingResponsePtr CurrentResponse;
+ TDeque<THttpIncomingRequestPtr> RecycledRequests;
+
+ THPTimer InactivityTimer;
+ static constexpr TDuration InactivityTimeout = TDuration::Minutes(2);
+ TEvPollerReady* InactivityEvent = nullptr;
+
+ TPollerToken::TPtr PollerToken;
+
+ TIncomingConnectionActor(
+ const TEndpointInfo& endpoint,
+ TIntrusivePtr<TSocketDescriptor> socket,
+ SocketAddressType address,
+ THttpIncomingRequestPtr recycledRequest = nullptr)
+ : TBase(&TIncomingConnectionActor::StateAccepting)
+ , TSocketImpl(std::move(socket))
+ , Endpoint(endpoint)
+ , Address(address)
+ {
+ if (recycledRequest != nullptr) {
+ RecycledRequests.emplace_back(std::move(recycledRequest));
+ }
+ TSocketImpl::SetNonBlock();
+ }
+
+ void CleanupRequest(THttpIncomingRequestPtr& request) {
+ if (RecycleRequests) {
+ request->Clear();
+ RecycledRequests.push_back(std::move(request));
+ } else {
+ request = nullptr;
+ }
+ }
+
+ void CleanupResponse(THttpOutgoingResponsePtr& response) {
+ CleanupRequest(response->Request);
+ // TODO: maybe recycle too?
+ response = nullptr;
+ }
+
+ TAutoPtr<IEventHandle> AfterRegister(const TActorId& self, const TActorId& parent) override {
+ return new IEventHandle(self, parent, new TEvents::TEvBootstrap());
+ }
+
+ void Die(const TActorContext& ctx) override {
+ ctx.Send(Endpoint.Owner, new TEvHttpProxy::TEvHttpConnectionClosed(ctx.SelfID, std::move(RecycledRequests)));
+ TSocketImpl::Shutdown();
+ TBase::Die(ctx);
+ }
+
+protected:
+ void Bootstrap(const TActorContext& ctx) {
+ InactivityTimer.Reset();
+ ctx.Schedule(InactivityTimeout, InactivityEvent = new TEvPollerReady(nullptr, false, false));
+ LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") incoming connection opened");
+ OnAccept(ctx);
+ }
+
+ void OnAccept(const NActors::TActorContext& ctx) {
+ int res;
+ bool read = false, write = false;
+ if ((res = TSocketImpl::OnAccept(Endpoint, read, write)) != 1) {
+ if (-res == EAGAIN) {
+ if (PollerToken) {
+ PollerToken->Request(read, write);
+ }
+ return; // wait for further notifications
+ } else {
+ LOG_ERROR_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed - error in Accept: " << strerror(-res));
+ return Die(ctx);
+ }
+ }
+ TBase::Become(&TIncomingConnectionActor::StateConnected);
+ ctx.Send(ctx.SelfID, new TEvPollerReady(nullptr, true, true));
+ }
+
+ void HandleAccepting(TEvPollerRegisterResult::TPtr ev, const NActors::TActorContext& ctx) {
+ PollerToken = std::move(ev->Get()->PollerToken);
+ OnAccept(ctx);
+ }
+
+ void HandleAccepting(NActors::TEvPollerReady::TPtr, const NActors::TActorContext& ctx) {
+ OnAccept(ctx);
+ }
+
+ void HandleConnected(TEvPollerReady::TPtr event, const TActorContext& ctx) {
+ if (event->Get()->Read) {
+ for (;;) {
+ if (CurrentRequest == nullptr) {
+ if (RecycleRequests && !RecycledRequests.empty()) {
+ CurrentRequest = std::move(RecycledRequests.front());
+ RecycledRequests.pop_front();
+ } else {
+ CurrentRequest = new THttpIncomingRequest();
+ }
+ CurrentRequest->Address = Address;
+ CurrentRequest->WorkerName = Endpoint.WorkerName;
+ CurrentRequest->Secure = Endpoint.Secure;
+ }
+ if (!CurrentRequest->EnsureEnoughSpaceAvailable()) {
+ LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed - not enough space available");
+ return Die(ctx);
+ }
+ ssize_t need = CurrentRequest->Avail();
+ bool read = false, write = false;
+ ssize_t res = TSocketImpl::Recv(CurrentRequest->Pos(), need, read, write);
+ if (res > 0) {
+ InactivityTimer.Reset();
+ CurrentRequest->Advance(res);
+ if (CurrentRequest->IsDone()) {
+ Requests.emplace_back(CurrentRequest);
+ CurrentRequest->Timer.Reset();
+ if (CurrentRequest->IsReady()) {
+ LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") -> (" << CurrentRequest->Method << " " << CurrentRequest->URL << ")");
+ ctx.Send(Endpoint.Proxy, new TEvHttpProxy::TEvHttpIncomingRequest(CurrentRequest));
+ CurrentRequest = nullptr;
+ } else if (CurrentRequest->IsError()) {
+ LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") -! (" << CurrentRequest->Method << " " << CurrentRequest->URL << ")");
+ bool success = Respond(CurrentRequest->CreateResponseBadRequest(), ctx);
+ if (!success) {
+ return;
+ }
+ CurrentRequest = nullptr;
+ }
+ }
+ } else if (-res == EAGAIN || -res == EWOULDBLOCK) {
+ if (PollerToken) {
+ if (!read && !write) {
+ read = true;
+ }
+ PollerToken->Request(read, write);
+ }
+ break;
+ } else if (-res == EINTR) {
+ continue;
+ } else if (!res) {
+ // connection closed
+ LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed");
+ return Die(ctx);
+ } else {
+ LOG_ERROR_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed - error in Receive: " << strerror(-res));
+ return Die(ctx);
+ }
+ }
+ if (event->Get() == InactivityEvent) {
+ const TDuration passed = TDuration::Seconds(std::abs(InactivityTimer.Passed()));
+ if (passed >= InactivityTimeout) {
+ LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed by inactivity timeout");
+ return Die(ctx); // timeout
+ } else {
+ ctx.Schedule(InactivityTimeout - passed, InactivityEvent = new TEvPollerReady(nullptr, false, false));
+ }
+ }
+ }
+ if (event->Get()->Write) {
+ FlushOutput(ctx);
+ }
+ }
+
+ void HandleConnected(TEvPollerRegisterResult::TPtr ev, const TActorContext& /*ctx*/) {
+ PollerToken = std::move(ev->Get()->PollerToken);
+ PollerToken->Request(true, true);
+ }
+
+ void HandleConnected(TEvHttpProxy::TEvHttpOutgoingResponse::TPtr event, const TActorContext& ctx) {
+ Respond(event->Get()->Response, ctx);
+ }
+
+ bool Respond(THttpOutgoingResponsePtr response, const TActorContext& ctx) {
+ THttpIncomingRequestPtr request = response->GetRequest();
+ response->Finish();
+ LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") <- (" << response->Status << " " << response->Message << ")");
+ if (response->Status != "200" && response->Status != "404") {
+ static constexpr size_t MAX_LOGGED_SIZE = 1024;
+ LOG_DEBUG_S(ctx, HttpLog,
+ "(#"
+ << TSocketImpl::GetRawSocket()
+ << ","
+ << Address
+ << ") Request: "
+ << request->GetObfuscatedData().substr(0, MAX_LOGGED_SIZE));
+ LOG_DEBUG_S(ctx, HttpLog,
+ "(#"
+ << TSocketImpl::GetRawSocket()
+ << ","
+ << Address
+ << ") Response: "
+ << TString(response->GetRawData()).substr(0, MAX_LOGGED_SIZE));
+ }
+ THolder<TEvHttpProxy::TEvReportSensors> sensors(BuildIncomingRequestSensors(request, response));
+ ctx.Send(Endpoint.Owner, sensors.Release());
+ if (request == Requests.front() && CurrentResponse == nullptr) {
+ CurrentResponse = response;
+ return FlushOutput(ctx);
+ } else {
+ // we are ahead of our pipeline
+ Responses.emplace(request, response);
+ return true;
+ }
+ }
+
+ bool FlushOutput(const TActorContext& ctx) {
+ while (CurrentResponse != nullptr) {
+ size_t size = CurrentResponse->Size();
+ if (size == 0) {
+ Y_VERIFY(Requests.front() == CurrentResponse->GetRequest());
+ bool close = CurrentResponse->IsConnectionClose();
+ Requests.pop_front();
+ CleanupResponse(CurrentResponse);
+ if (!Requests.empty()) {
+ auto it = Responses.find(Requests.front());
+ if (it != Responses.end()) {
+ CurrentResponse = it->second;
+ Responses.erase(it);
+ continue;
+ } else {
+ LOG_ERROR_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed - FlushOutput request not found");
+ Die(ctx);
+ return false;
+ }
+ } else {
+ if (close) {
+ LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed");
+ Die(ctx);
+ return false;
+ } else {
+ continue;
+ }
+ }
+ }
+ bool read = false, write = false;
+ ssize_t res = TSocketImpl::Send(CurrentResponse->Data(), size, read, write);
+ if (res > 0) {
+ CurrentResponse->ChopHead(res);
+ } else if (-res == EINTR) {
+ continue;
+ } else if (-res == EAGAIN || -res == EWOULDBLOCK) {
+ if (PollerToken) {
+ if (!read && !write) {
+ write = true;
+ }
+ PollerToken->Request(read, write);
+ }
+ break;
+ } else {
+ CleanupResponse(CurrentResponse);
+ LOG_ERROR_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed - error in FlushOutput: " << strerror(-res));
+ Die(ctx);
+ return false;
+ }
+ }
+ return true;
+ }
+
+ STFUNC(StateAccepting) {
+ switch (ev->GetTypeRewrite()) {
+ CFunc(TEvents::TEvBootstrap::EventType, Bootstrap);
+ HFunc(TEvPollerReady, HandleAccepting);
+ HFunc(TEvPollerRegisterResult, HandleAccepting);
+ }
+ }
+
+ STFUNC(StateConnected) {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(TEvPollerReady, HandleConnected);
+ HFunc(TEvHttpProxy::TEvHttpOutgoingResponse, HandleConnected);
+ HFunc(TEvPollerRegisterResult, HandleConnected);
+ }
+ }
+};
+
+IActor* CreateIncomingConnectionActor(
+ const TEndpointInfo& endpoint,
+ TIntrusivePtr<TSocketDescriptor> socket,
+ THttpConfig::SocketAddressType address,
+ THttpIncomingRequestPtr recycledRequest) {
+ if (endpoint.Secure) {
+ return new TIncomingConnectionActor<TSecureSocketImpl>(endpoint, std::move(socket), address, std::move(recycledRequest));
+ } else {
+ return new TIncomingConnectionActor<TPlainSocketImpl>(endpoint, std::move(socket), address, std::move(recycledRequest));
+ }
+}
+
+}
diff --git a/library/cpp/actors/http/http_proxy_outgoing.cpp b/library/cpp/actors/http/http_proxy_outgoing.cpp
new file mode 100644
index 0000000000..d9189dba8a
--- /dev/null
+++ b/library/cpp/actors/http/http_proxy_outgoing.cpp
@@ -0,0 +1,298 @@
+#include "http_proxy.h"
+#include "http_proxy_sock_impl.h"
+
+namespace NHttp {
+
+template <typename TSocketImpl>
+class TOutgoingConnectionActor : public NActors::TActor<TOutgoingConnectionActor<TSocketImpl>>, public TSocketImpl, virtual public THttpConfig {
+public:
+ using TBase = NActors::TActor<TOutgoingConnectionActor<TSocketImpl>>;
+ using TSelf = TOutgoingConnectionActor<TSocketImpl>;
+ const TActorId Owner;
+ const TActorId Poller;
+ SocketAddressType Address;
+ TString Host;
+ TActorId RequestOwner;
+ THttpOutgoingRequestPtr Request;
+ THttpIncomingResponsePtr Response;
+ TInstant LastActivity;
+ TDuration ConnectionTimeout = CONNECTION_TIMEOUT;
+ NActors::TPollerToken::TPtr PollerToken;
+
+ TOutgoingConnectionActor(const TActorId& owner, const TString& host, const TActorId& poller)
+ : TBase(&TSelf::StateWaiting)
+ , Owner(owner)
+ , Poller(poller)
+ , Host(host)
+ {
+ TSocketImpl::SetNonBlock();
+ TSocketImpl::SetTimeout(SOCKET_TIMEOUT);
+ }
+
+ void Die(const NActors::TActorContext& ctx) override {
+ ctx.Send(Owner, new TEvHttpProxy::TEvHttpConnectionClosed(ctx.SelfID));
+ TSocketImpl::Shutdown(); // to avoid errors when connection already closed
+ TBase::Die(ctx);
+ }
+
+ void ReplyAndDie(const NActors::TActorContext& ctx) {
+ LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") -> (" << Response->Status << " " << Response->Message << ")");
+ ctx.Send(RequestOwner, new TEvHttpProxy::TEvHttpIncomingResponse(Request, Response));
+ RequestOwner = TActorId();
+ THolder<TEvHttpProxy::TEvReportSensors> sensors(BuildOutgoingRequestSensors(Request, Response));
+ ctx.Send(Owner, sensors.Release());
+ LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed");
+ Die(ctx);
+ }
+
+ void ReplyErrorAndDie(const NActors::TActorContext& ctx, const TString& error) {
+ LOG_ERROR_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed with error: " << error);
+ if (RequestOwner) {
+ ctx.Send(RequestOwner, new TEvHttpProxy::TEvHttpIncomingResponse(Request, Response, error));
+ RequestOwner = TActorId();
+ THolder<TEvHttpProxy::TEvReportSensors> sensors(BuildOutgoingRequestSensors(Request, Response));
+ ctx.Send(Owner, sensors.Release());
+ Die(ctx);
+ }
+ }
+
+protected:
+ void FailConnection(const NActors::TActorContext& ctx, const TString& error) {
+ if (Request) {
+ return ReplyErrorAndDie(ctx, error);
+ }
+ return TBase::Become(&TOutgoingConnectionActor::StateFailed);
+ }
+
+ void Connect(const NActors::TActorContext& ctx) {
+ LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connecting");
+ int res = TSocketImpl::Connect(Address);
+ RegisterPoller(ctx);
+ switch (-res) {
+ case 0:
+ return OnConnect(ctx);
+ case EINPROGRESS:
+ case EAGAIN:
+ return TBase::Become(&TOutgoingConnectionActor::StateConnecting);
+ default:
+ return ReplyErrorAndDie(ctx, strerror(-res));
+ }
+ }
+
+ void FlushOutput(const NActors::TActorContext& ctx) {
+ if (Request != nullptr) {
+ Request->Finish();
+ while (auto size = Request->Size()) {
+ bool read = false, write = false;
+ ssize_t res = TSocketImpl::Send(Request->Data(), size, read, write);
+ if (res > 0) {
+ Request->ChopHead(res);
+ } else if (-res == EINTR) {
+ continue;
+ } else if (-res == EAGAIN || -res == EWOULDBLOCK) {
+ if (PollerToken) {
+ if (!read && !write) {
+ write = true;
+ }
+ PollerToken->Request(read, write);
+ }
+ break;
+ } else {
+ if (!res) {
+ ReplyAndDie(ctx);
+ } else {
+ ReplyErrorAndDie(ctx, strerror(-res));
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ void PullInput(const NActors::TActorContext& ctx) {
+ for (;;) {
+ if (Response == nullptr) {
+ Response = new THttpIncomingResponse(Request);
+ }
+ if (!Response->EnsureEnoughSpaceAvailable()) {
+ return ReplyErrorAndDie(ctx, "Not enough space in socket buffer");
+ }
+ bool read = false, write = false;
+ ssize_t res = TSocketImpl::Recv(Response->Pos(), Response->Avail(), read, write);
+ if (res > 0) {
+ Response->Advance(res);
+ if (Response->IsDone() && Response->IsReady()) {
+ return ReplyAndDie(ctx);
+ }
+ } else if (-res == EINTR) {
+ continue;
+ } else if (-res == EAGAIN || -res == EWOULDBLOCK) {
+ if (PollerToken) {
+ if (!read && !write) {
+ read = true;
+ }
+ PollerToken->Request(read, write);
+ }
+ return;
+ } else {
+ if (!res) {
+ Response->ConnectionClosed();
+ }
+ if (Response->IsDone() && Response->IsReady()) {
+ return ReplyAndDie(ctx);
+ }
+ return ReplyErrorAndDie(ctx, strerror(-res));
+ }
+ }
+ }
+
+ void RegisterPoller(const NActors::TActorContext& ctx) {
+ ctx.Send(Poller, new NActors::TEvPollerRegister(TSocketImpl::Socket, ctx.SelfID, ctx.SelfID));
+ }
+
+ void OnConnect(const NActors::TActorContext& ctx) {
+ bool read = false, write = false;
+ if (int res = TSocketImpl::OnConnect(read, write); res != 1) {
+ if (-res == EAGAIN) {
+ if (PollerToken) {
+ PollerToken->Request(read, write);
+ }
+ return;
+ } else {
+ return ReplyErrorAndDie(ctx, strerror(-res));
+ }
+ }
+ LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") outgoing connection opened");
+ TBase::Become(&TOutgoingConnectionActor::StateConnected);
+ LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") <- (" << Request->Method << " " << Request->URL << ")");
+ ctx.Send(ctx.SelfID, new NActors::TEvPollerReady(nullptr, true, true));
+ }
+
+ void HandleResolving(TEvHttpProxy::TEvResolveHostResponse::TPtr event, const NActors::TActorContext& ctx) {
+ LastActivity = ctx.Now();
+ if (!event->Get()->Error.empty()) {
+ return FailConnection(ctx, event->Get()->Error);
+ }
+ Address = event->Get()->Address;
+ if (Address.GetPort() == 0) {
+ Address.SetPort(Request->Secure ? 443 : 80);
+ }
+ Connect(ctx);
+ }
+
+ void HandleConnecting(NActors::TEvPollerReady::TPtr, const NActors::TActorContext& ctx) {
+ LastActivity = ctx.Now();
+ int res = TSocketImpl::GetError();
+ if (res == 0) {
+ OnConnect(ctx);
+ } else {
+ FailConnection(ctx, TStringBuilder() << strerror(res));
+ }
+ }
+
+ void HandleConnecting(NActors::TEvPollerRegisterResult::TPtr ev, const NActors::TActorContext& ctx) {
+ PollerToken = std::move(ev->Get()->PollerToken);
+ LastActivity = ctx.Now();
+ int res = TSocketImpl::GetError();
+ if (res == 0) {
+ OnConnect(ctx);
+ } else {
+ FailConnection(ctx, TStringBuilder() << strerror(res));
+ }
+ }
+
+ void HandleWaiting(TEvHttpProxy::TEvHttpOutgoingRequest::TPtr event, const NActors::TActorContext& ctx) {
+ LastActivity = ctx.Now();
+ Request = std::move(event->Get()->Request);
+ Host = Request->Host;
+ LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << ") resolving " << Host);
+ Request->Timer.Reset();
+ RequestOwner = event->Sender;
+ ctx.Send(Owner, new TEvHttpProxy::TEvResolveHostRequest(Host));
+ if (event->Get()->Timeout) {
+ ConnectionTimeout = event->Get()->Timeout;
+ TSocketImpl::SetTimeout(ConnectionTimeout);
+ }
+ ctx.Schedule(ConnectionTimeout, new NActors::TEvents::TEvWakeup());
+ LastActivity = ctx.Now();
+ TBase::Become(&TOutgoingConnectionActor::StateResolving);
+ }
+
+ void HandleConnected(NActors::TEvPollerReady::TPtr event, const NActors::TActorContext& ctx) {
+ LastActivity = ctx.Now();
+ if (event->Get()->Read) {
+ PullInput(ctx);
+ }
+ if (event->Get()->Write) {
+ FlushOutput(ctx);
+ }
+ }
+
+ void HandleConnected(NActors::TEvPollerRegisterResult::TPtr ev, const NActors::TActorContext& ctx) {
+ PollerToken = std::move(ev->Get()->PollerToken);
+ LastActivity = ctx.Now();
+ PullInput(ctx);
+ FlushOutput(ctx);
+ }
+
+ void HandleFailed(TEvHttpProxy::TEvHttpOutgoingRequest::TPtr event, const NActors::TActorContext& ctx) {
+ Request = std::move(event->Get()->Request);
+ RequestOwner = event->Sender;
+ ReplyErrorAndDie(ctx, "Failed");
+ }
+
+ void HandleTimeout(const NActors::TActorContext& ctx) {
+ TDuration inactivityTime = ctx.Now() - LastActivity;
+ if (inactivityTime >= ConnectionTimeout) {
+ FailConnection(ctx, "Connection timed out");
+ } else {
+ ctx.Schedule(Min(ConnectionTimeout - inactivityTime, TDuration::MilliSeconds(100)), new NActors::TEvents::TEvWakeup());
+ }
+ }
+
+ STFUNC(StateWaiting) {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(TEvHttpProxy::TEvHttpOutgoingRequest, HandleWaiting);
+ CFunc(NActors::TEvents::TEvWakeup::EventType, HandleTimeout);
+ }
+ }
+
+ STFUNC(StateResolving) {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(TEvHttpProxy::TEvResolveHostResponse, HandleResolving);
+ CFunc(NActors::TEvents::TEvWakeup::EventType, HandleTimeout);
+ }
+ }
+
+ STFUNC(StateConnecting) {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(NActors::TEvPollerReady, HandleConnecting);
+ CFunc(NActors::TEvents::TEvWakeup::EventType, HandleTimeout);
+ HFunc(NActors::TEvPollerRegisterResult, HandleConnecting);
+ }
+ }
+
+ STFUNC(StateConnected) {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(NActors::TEvPollerReady, HandleConnected);
+ CFunc(NActors::TEvents::TEvWakeup::EventType, HandleTimeout);
+ HFunc(NActors::TEvPollerRegisterResult, HandleConnected);
+ }
+ }
+
+ STFUNC(StateFailed) {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(TEvHttpProxy::TEvHttpOutgoingRequest, HandleFailed);
+ }
+ }
+};
+
+NActors::IActor* CreateOutgoingConnectionActor(const TActorId& owner, const TString& host, bool secure, const TActorId& poller) {
+ if (secure) {
+ return new TOutgoingConnectionActor<TSecureSocketImpl>(owner, host, poller);
+ } else {
+ return new TOutgoingConnectionActor<TPlainSocketImpl>(owner, host, poller);
+ }
+}
+
+}
diff --git a/library/cpp/actors/http/http_proxy_sock_impl.h b/library/cpp/actors/http/http_proxy_sock_impl.h
new file mode 100644
index 0000000000..bf8c71d05a
--- /dev/null
+++ b/library/cpp/actors/http/http_proxy_sock_impl.h
@@ -0,0 +1,262 @@
+#pragma once
+
+#include "http.h"
+#include "http_proxy.h"
+
+namespace NHttp {
+
+struct TPlainSocketImpl : virtual public THttpConfig {
+ TIntrusivePtr<TSocketDescriptor> Socket;
+
+ TPlainSocketImpl()
+ : Socket(new TSocketDescriptor())
+ {}
+
+ TPlainSocketImpl(TIntrusivePtr<TSocketDescriptor> socket)
+ : Socket(std::move(socket))
+ {}
+
+ SOCKET GetRawSocket() const {
+ return static_cast<SOCKET>(Socket->Socket);
+ }
+
+ void SetNonBlock(bool nonBlock = true) noexcept {
+ try {
+ ::SetNonBlock(Socket->Socket, nonBlock);
+ }
+ catch (const yexception&) {
+ }
+ }
+
+ void SetTimeout(TDuration timeout) noexcept {
+ try {
+ ::SetSocketTimeout(Socket->Socket, timeout.Seconds(), timeout.MilliSecondsOfSecond());
+ }
+ catch (const yexception&) {
+ }
+ }
+
+ void Shutdown() {
+ //Socket->Socket.ShutDown(SHUT_RDWR); // KIKIMR-3895
+ ::shutdown(Socket->Socket, SHUT_RDWR);
+ }
+
+ int Connect(const SocketAddressType& address) {
+ return Socket->Socket.Connect(&address);
+ }
+
+ static constexpr int OnConnect(bool&, bool&) {
+ return 1;
+ }
+
+ static constexpr int OnAccept(const TEndpointInfo&, bool&, bool&) {
+ return 1;
+ }
+
+ bool IsGood() {
+ int res;
+ GetSockOpt(Socket->Socket, SOL_SOCKET, SO_ERROR, res);
+ return res == 0;
+ }
+
+ int GetError() {
+ int res;
+ GetSockOpt(Socket->Socket, SOL_SOCKET, SO_ERROR, res);
+ return res;
+ }
+
+ ssize_t Send(const void* data, size_t size, bool&, bool&) {
+ return Socket->Socket.Send(data, size);
+ }
+
+ ssize_t Recv(void* data, size_t size, bool&, bool&) {
+ return Socket->Socket.Recv(data, size);
+ }
+};
+
+struct TSecureSocketImpl : TPlainSocketImpl, TSslHelpers {
+ static TSecureSocketImpl* IO(BIO* bio) noexcept {
+ return static_cast<TSecureSocketImpl*>(BIO_get_data(bio));
+ }
+
+ static int IoWrite(BIO* bio, const char* data, int dlen) noexcept {
+ BIO_clear_retry_flags(bio);
+ int res = IO(bio)->Socket->Socket.Send(data, dlen);
+ if (-res == EAGAIN) {
+ BIO_set_retry_write(bio);
+ }
+ return res;
+ }
+
+ static int IoRead(BIO* bio, char* data, int dlen) noexcept {
+ BIO_clear_retry_flags(bio);
+ int res = IO(bio)->Socket->Socket.Recv(data, dlen);
+ if (-res == EAGAIN) {
+ BIO_set_retry_read(bio);
+ }
+ return res;
+ }
+
+ static int IoPuts(BIO* bio, const char* buf) noexcept {
+ Y_UNUSED(bio);
+ Y_UNUSED(buf);
+ return -2;
+ }
+
+ static int IoGets(BIO* bio, char* buf, int size) noexcept {
+ Y_UNUSED(bio);
+ Y_UNUSED(buf);
+ Y_UNUSED(size);
+ return -2;
+ }
+
+ static long IoCtrl(BIO* bio, int cmd, long larg, void* parg) noexcept {
+ Y_UNUSED(larg);
+ Y_UNUSED(parg);
+
+ if (cmd == BIO_CTRL_FLUSH) {
+ IO(bio)->Flush();
+ return 1;
+ }
+
+ return -2;
+ }
+
+ static int IoCreate(BIO* bio) noexcept {
+ BIO_set_data(bio, nullptr);
+ BIO_set_init(bio, 1);
+ return 1;
+ }
+
+ static int IoDestroy(BIO* bio) noexcept {
+ BIO_set_data(bio, nullptr);
+ BIO_set_init(bio, 0);
+ return 1;
+ }
+
+ static BIO_METHOD* CreateIoMethod() {
+ BIO_METHOD* method = BIO_meth_new(BIO_get_new_index() | BIO_TYPE_SOURCE_SINK, "SecureSocketImpl");
+ BIO_meth_set_write(method, IoWrite);
+ BIO_meth_set_read(method, IoRead);
+ BIO_meth_set_puts(method, IoPuts);
+ BIO_meth_set_gets(method, IoGets);
+ BIO_meth_set_ctrl(method, IoCtrl);
+ BIO_meth_set_create(method, IoCreate);
+ BIO_meth_set_destroy(method, IoDestroy);
+ return method;
+ }
+
+ static BIO_METHOD* IoMethod() {
+ static BIO_METHOD* method = CreateIoMethod();
+ return method;
+ }
+
+ TSslHolder<BIO> Bio;
+ TSslHolder<SSL_CTX> Ctx;
+ TSslHolder<SSL> Ssl;
+
+ TSecureSocketImpl() = default;
+
+ TSecureSocketImpl(TIntrusivePtr<TSocketDescriptor> socket)
+ : TPlainSocketImpl(std::move(socket))
+ {}
+
+ void InitClientSsl() {
+ Bio.Reset(BIO_new(IoMethod()));
+ BIO_set_data(Bio.Get(), this);
+ BIO_set_nbio(Bio.Get(), 1);
+ Ctx = CreateClientContext();
+ Ssl = ConstructSsl(Ctx.Get(), Bio.Get());
+ SSL_set_connect_state(Ssl.Get());
+ }
+
+ void InitServerSsl(SSL_CTX* ctx) {
+ Bio.Reset(BIO_new(IoMethod()));
+ BIO_set_data(Bio.Get(), this);
+ BIO_set_nbio(Bio.Get(), 1);
+ Ssl = ConstructSsl(ctx, Bio.Get());
+ SSL_set_accept_state(Ssl.Get());
+ }
+
+ void Flush() {}
+
+ ssize_t Send(const void* data, size_t size, bool& read, bool& write) {
+ ssize_t res = SSL_write(Ssl.Get(), data, size);
+ if (res < 0) {
+ res = SSL_get_error(Ssl.Get(), res);
+ switch(res) {
+ case SSL_ERROR_WANT_READ:
+ read = true;
+ return -EAGAIN;
+ case SSL_ERROR_WANT_WRITE:
+ write = true;
+ return -EAGAIN;
+ default:
+ return -EIO;
+ }
+ }
+ return res;
+ }
+
+ ssize_t Recv(void* data, size_t size, bool& read, bool& write) {
+ ssize_t res = SSL_read(Ssl.Get(), data, size);
+ if (res < 0) {
+ res = SSL_get_error(Ssl.Get(), res);
+ switch(res) {
+ case SSL_ERROR_WANT_READ:
+ read = true;
+ return -EAGAIN;
+ case SSL_ERROR_WANT_WRITE:
+ write = true;
+ return -EAGAIN;
+ default:
+ return -EIO;
+ }
+ }
+ return res;
+ }
+
+ int OnConnect(bool& read, bool& write) {
+ if (!Ssl) {
+ InitClientSsl();
+ }
+ int res = SSL_connect(Ssl.Get());
+ if (res <= 0) {
+ res = SSL_get_error(Ssl.Get(), res);
+ switch(res) {
+ case SSL_ERROR_WANT_READ:
+ read = true;
+ return -EAGAIN;
+ case SSL_ERROR_WANT_WRITE:
+ write = true;
+ return -EAGAIN;
+ default:
+ return -EIO;
+ }
+ }
+ return res;
+ }
+
+ int OnAccept(const TEndpointInfo& endpoint, bool& read, bool& write) {
+ if (!Ssl) {
+ InitServerSsl(endpoint.SecureContext.Get());
+ }
+ int res = SSL_accept(Ssl.Get());
+ if (res <= 0) {
+ res = SSL_get_error(Ssl.Get(), res);
+ switch(res) {
+ case SSL_ERROR_WANT_READ:
+ read = true;
+ return -EAGAIN;
+ case SSL_ERROR_WANT_WRITE:
+ write = true;
+ return -EAGAIN;
+ default:
+ return -EIO;
+ }
+ }
+ return res;
+ }
+};
+
+}
diff --git a/library/cpp/actors/http/http_proxy_ssl.h b/library/cpp/actors/http/http_proxy_ssl.h
new file mode 100644
index 0000000000..ffce12997f
--- /dev/null
+++ b/library/cpp/actors/http/http_proxy_ssl.h
@@ -0,0 +1,131 @@
+#pragma once
+
+#include <openssl/bio.h>
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+#include <openssl/tls1.h>
+
+namespace NHttp {
+
+struct TSslHelpers {
+ struct TSslDestroy {
+ static void Destroy(SSL_CTX* ctx) noexcept {
+ SSL_CTX_free(ctx);
+ }
+
+ static void Destroy(SSL* ssl) noexcept {
+ SSL_free(ssl);
+ }
+
+ static void Destroy(X509* cert) noexcept {
+ X509_free(cert);
+ }
+
+ static void Destroy(EVP_PKEY* pkey) noexcept {
+ EVP_PKEY_free(pkey);
+ }
+
+ static void Destroy(BIO* bio) noexcept {
+ BIO_free(bio);
+ }
+ };
+
+ template <typename T>
+ using TSslHolder = THolder<T, TSslDestroy>;
+
+ static TSslHolder<SSL_CTX> CreateSslCtx(const SSL_METHOD* method) {
+ TSslHolder<SSL_CTX> ctx(SSL_CTX_new(method));
+
+ if (ctx) {
+ SSL_CTX_set_options(ctx.Get(), SSL_OP_NO_SSLv2);
+ SSL_CTX_set_options(ctx.Get(), SSL_OP_NO_SSLv3);
+ SSL_CTX_set_options(ctx.Get(), SSL_OP_MICROSOFT_SESS_ID_BUG);
+ SSL_CTX_set_options(ctx.Get(), SSL_OP_NETSCAPE_CHALLENGE_BUG);
+ }
+
+ return ctx;
+ }
+
+ static TSslHolder<SSL_CTX> CreateClientContext() {
+ return CreateSslCtx(SSLv23_client_method());
+ }
+
+ static TSslHolder<SSL_CTX> CreateServerContext(const TString& certificate, const TString& key) {
+ TSslHolder<SSL_CTX> ctx = CreateSslCtx(SSLv23_server_method());
+ SSL_CTX_set_ecdh_auto(ctx.Get(), 1);
+ int res;
+ res = SSL_CTX_use_certificate_chain_file(ctx.Get(), certificate.c_str());
+ if (res < 0) {
+ // TODO(xenoxeno): more diagnostics?
+ return nullptr;
+ }
+ res = SSL_CTX_use_PrivateKey_file(ctx.Get(), key.c_str(), SSL_FILETYPE_PEM);
+ if (res < 0) {
+ // TODO(xenoxeno): more diagnostics?
+ return nullptr;
+ }
+ return ctx;
+ }
+
+ static bool LoadX509Chain(TSslHolder<SSL_CTX>& ctx, const TString& pem) {
+ TSslHolder<BIO> bio(BIO_new_mem_buf(pem.c_str(), pem.size()));
+ if (bio == nullptr) {
+ return false;
+ }
+ TSslHolder<X509> cert(PEM_read_bio_X509_AUX(bio.Get(), nullptr, nullptr, nullptr));
+ if (cert == nullptr) {
+ return false;
+ }
+ if (SSL_CTX_use_certificate(ctx.Get(), cert.Release()) <= 0) {
+ return false;
+ }
+ SSL_CTX_clear_chain_certs(ctx.Get());
+ while (true) {
+ TSslHolder<X509> ca(PEM_read_bio_X509(bio.Get(), nullptr, nullptr, nullptr));
+ if (ca == nullptr) {
+ break;
+ }
+ if (!SSL_CTX_add0_chain_cert(ctx.Get(), ca.Release())) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ static bool LoadPrivateKey(TSslHolder<SSL_CTX>& ctx, const TString& pem) {
+ TSslHolder<BIO> bio(BIO_new_mem_buf(pem.c_str(), pem.size()));
+ if (bio == nullptr) {
+ return false;
+ }
+ TSslHolder<EVP_PKEY> pkey(PEM_read_bio_PrivateKey(bio.Get(), nullptr, nullptr, nullptr));
+ if (SSL_CTX_use_PrivateKey(ctx.Get(), pkey.Release()) <= 0) {
+ return false;
+ }
+ return true;
+ }
+
+ static TSslHolder<SSL_CTX> CreateServerContext(const TString& pem) {
+ TSslHolder<SSL_CTX> ctx = CreateSslCtx(SSLv23_server_method());
+ SSL_CTX_set_ecdh_auto(ctx.Get(), 1);
+ if (!LoadX509Chain(ctx, pem)) {
+ return nullptr;
+ }
+ if (!LoadPrivateKey(ctx, pem)) {
+ return nullptr;
+ }
+ return ctx;
+ }
+
+ static TSslHolder<SSL> ConstructSsl(SSL_CTX* ctx, BIO* bio) {
+ TSslHolder<SSL> ssl(SSL_new(ctx));
+
+ if (ssl) {
+ BIO_up_ref(bio); // SSL_set_bio consumes only one reference if rbio and wbio are the same
+ SSL_set_bio(ssl.Get(), bio, bio);
+ }
+
+ return ssl;
+ }
+};
+
+}
diff --git a/library/cpp/actors/http/http_static.cpp b/library/cpp/actors/http/http_static.cpp
new file mode 100644
index 0000000000..c075c5f693
--- /dev/null
+++ b/library/cpp/actors/http/http_static.cpp
@@ -0,0 +1,95 @@
+#include "http_proxy.h"
+#include "http_static.h"
+#include <library/cpp/actors/core/executor_pool_basic.h>
+#include <library/cpp/actors/core/log.h>
+#include <library/cpp/actors/core/scheduler_basic.h>
+#include <library/cpp/actors/http/http.h>
+#include <library/cpp/resource/resource.h>
+#include <util/folder/path.h>
+#include <util/stream/file.h>
+
+namespace NHttp {
+
+class THttpStaticContentHandler : public NActors::TActor<THttpStaticContentHandler> {
+public:
+ using TBase = NActors::TActor<THttpStaticContentHandler>;
+ const TFsPath URL;
+ const TFsPath FilePath;
+ const TFsPath ResourcePath;
+ const TFsPath Index;
+
+ THttpStaticContentHandler(const TString& url, const TString& filePath, const TString& resourcePath, const TString& index)
+ : TBase(&THttpStaticContentHandler::StateWork)
+ , URL(url)
+ , FilePath(filePath)
+ , ResourcePath(resourcePath)
+ , Index(index)
+ {}
+
+ static TInstant GetCompileTime() {
+ tm compileTime;
+ strptime(__DATE__ " " __TIME__, "%B %d %Y %H:%M:%S", &compileTime);
+ return TInstant::Seconds(mktime(&compileTime));
+ }
+
+ void Handle(NHttp::TEvHttpProxy::TEvHttpIncomingRequest::TPtr event, const NActors::TActorContext& ctx) {
+ THttpOutgoingResponsePtr response;
+ if (event->Get()->Request->Method != "GET") {
+ response = event->Get()->Request->CreateResponseBadRequest("Wrong request");
+ ctx.Send(event->Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response));
+ return;
+ }
+ TFsPath url(event->Get()->Request->URL.Before('?'));
+ if (!url.IsAbsolute()) {
+ response = event->Get()->Request->CreateResponseBadRequest("Completely wrong URL");
+ ctx.Send(event->Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response));
+ return;
+ }
+ if (url.GetPath().EndsWith('/') && Index.IsDefined()) {
+ url /= Index;
+ }
+ url = url.RelativeTo(URL);
+ try {
+ // TODO: caching?
+ TString contentType = mimetypeByExt(url.GetExtension().c_str());
+ TString data;
+ TFileStat filestat;
+ TFsPath resourcename(ResourcePath / url);
+ if (NResource::FindExact(resourcename.GetPath(), &data)) {
+ static TInstant compileTime(GetCompileTime());
+ filestat.MTime = compileTime.Seconds();
+ } else {
+ TFsPath filename(FilePath / url);
+ if (!filename.IsSubpathOf(FilePath) && filename != FilePath) {
+ response = event->Get()->Request->CreateResponseBadRequest("Wrong URL");
+ ctx.Send(event->Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response));
+ return;
+ }
+ if (filename.Stat(filestat) && filestat.IsFile()) {
+ data = TUnbufferedFileInput(filename).ReadAll();
+ }
+ }
+ if (!filestat.IsNull()) {
+ response = event->Get()->Request->CreateResponseOK(data, contentType, TInstant::Seconds(filestat.MTime));
+ } else {
+ response = event->Get()->Request->CreateResponseNotFound("File not found");
+ }
+ }
+ catch (const yexception&) {
+ response = event->Get()->Request->CreateResponseServiceUnavailable("Not available");
+ }
+ ctx.Send(event->Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response));
+ }
+
+ STFUNC(StateWork) {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(NHttp::TEvHttpProxy::TEvHttpIncomingRequest, Handle);
+ }
+ }
+};
+
+NActors::IActor* CreateHttpStaticContentHandler(const TString& url, const TString& filePath, const TString& resourcePath, const TString& index) {
+ return new THttpStaticContentHandler(url, filePath, resourcePath, index);
+}
+
+}
diff --git a/library/cpp/actors/http/http_static.h b/library/cpp/actors/http/http_static.h
new file mode 100644
index 0000000000..f91e15dfb1
--- /dev/null
+++ b/library/cpp/actors/http/http_static.h
@@ -0,0 +1,9 @@
+#pragma once
+#include <library/cpp/actors/core/actor.h>
+#include "http.h"
+
+namespace NHttp {
+
+NActors::IActor* CreateHttpStaticContentHandler(const TString& url, const TString& filePath, const TString& resourcePath, const TString& index = TString());
+
+}
diff --git a/library/cpp/actors/http/http_ut.cpp b/library/cpp/actors/http/http_ut.cpp
new file mode 100644
index 0000000000..4c922f8d0f
--- /dev/null
+++ b/library/cpp/actors/http/http_ut.cpp
@@ -0,0 +1,358 @@
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/testing/unittest/tests_data.h>
+#include <library/cpp/actors/core/executor_pool_basic.h>
+#include <library/cpp/actors/core/scheduler_basic.h>
+#include <library/cpp/actors/testlib/test_runtime.h>
+#include <util/system/tempfile.h>
+#include "http.h"
+#include "http_proxy.h"
+
+
+
+enum EService : NActors::NLog::EComponent {
+ MIN,
+ Logger,
+ MVP,
+ MAX
+};
+
+namespace {
+
+template <typename HttpType>
+void EatWholeString(TIntrusivePtr<HttpType>& request, const TString& data) {
+ request->EnsureEnoughSpaceAvailable(data.size());
+ auto size = std::min(request->Avail(), data.size());
+ memcpy(request->Pos(), data.data(), size);
+ request->Advance(size);
+}
+
+template <typename HttpType>
+void EatPartialString(TIntrusivePtr<HttpType>& request, const TString& data) {
+ for (char c : data) {
+ request->EnsureEnoughSpaceAvailable(1);
+ memcpy(request->Pos(), &c, 1);
+ request->Advance(1);
+ }
+}
+
+}
+
+Y_UNIT_TEST_SUITE(HttpProxy) {
+ Y_UNIT_TEST(BasicParsing) {
+ NHttp::THttpIncomingRequestPtr request = new NHttp::THttpIncomingRequest();
+ EatWholeString(request, "GET /test HTTP/1.1\r\nHost: test\r\nSome-Header: 32344\r\n\r\n");
+ UNIT_ASSERT_EQUAL(request->Stage, NHttp::THttpIncomingRequest::EParseStage::Done);
+ UNIT_ASSERT_EQUAL(request->Method, "GET");
+ UNIT_ASSERT_EQUAL(request->URL, "/test");
+ UNIT_ASSERT_EQUAL(request->Protocol, "HTTP");
+ UNIT_ASSERT_EQUAL(request->Version, "1.1");
+ UNIT_ASSERT_EQUAL(request->Host, "test");
+ UNIT_ASSERT_EQUAL(request->Headers, "Host: test\r\nSome-Header: 32344\r\n\r\n");
+ }
+
+ Y_UNIT_TEST(BasicParsingChunkedBody) {
+ NHttp::THttpOutgoingRequestPtr request = nullptr; //new NHttp::THttpOutgoingRequest();
+ NHttp::THttpIncomingResponsePtr response = new NHttp::THttpIncomingResponse(request);
+ EatWholeString(response, "HTTP/1.1 200 OK\r\nConnection: close\r\nTransfer-Encoding: chunked\r\n\r\n4\r\nthis\r\n4\r\n is \r\n5\r\ntest.\r\n0\r\n\r\n");
+ UNIT_ASSERT_EQUAL(response->Stage, NHttp::THttpIncomingResponse::EParseStage::Done);
+ UNIT_ASSERT_EQUAL(response->Status, "200");
+ UNIT_ASSERT_EQUAL(response->Connection, "close");
+ UNIT_ASSERT_EQUAL(response->Protocol, "HTTP");
+ UNIT_ASSERT_EQUAL(response->Version, "1.1");
+ UNIT_ASSERT_EQUAL(response->TransferEncoding, "chunked");
+ UNIT_ASSERT_EQUAL(response->Body, "this is test.");
+ }
+
+ Y_UNIT_TEST(InvalidParsingChunkedBody) {
+ NHttp::THttpOutgoingRequestPtr request = nullptr; //new NHttp::THttpOutgoingRequest();
+ NHttp::THttpIncomingResponsePtr response = new NHttp::THttpIncomingResponse(request);
+ EatWholeString(response, "HTTP/1.1 200 OK\r\nConnection: close\r\nTransfer-Encoding: chunked\r\n\r\n5\r\nthis\r\n4\r\n is \r\n5\r\ntest.\r\n0\r\n\r\n");
+ UNIT_ASSERT(response->IsError());
+ }
+
+ Y_UNIT_TEST(AdvancedParsingChunkedBody) {
+ NHttp::THttpOutgoingRequestPtr request = nullptr; //new NHttp::THttpOutgoingRequest();
+ NHttp::THttpIncomingResponsePtr response = new NHttp::THttpIncomingResponse(request);
+ EatWholeString(response, "HTTP/1.1 200 OK\r\nConnection: close\r\nTransfer-Encoding: chunked\r\n\r\n6\r\nthis\r\n\r\n4\r\n is \r\n5\r\ntest.\r\n0\r\n\r\n");
+ UNIT_ASSERT_EQUAL(response->Stage, NHttp::THttpIncomingResponse::EParseStage::Done);
+ UNIT_ASSERT_EQUAL(response->Status, "200");
+ UNIT_ASSERT_EQUAL(response->Connection, "close");
+ UNIT_ASSERT_EQUAL(response->Protocol, "HTTP");
+ UNIT_ASSERT_EQUAL(response->Version, "1.1");
+ UNIT_ASSERT_EQUAL(response->TransferEncoding, "chunked");
+ UNIT_ASSERT_EQUAL(response->Body, "this\r\n is test.");
+ }
+
+ Y_UNIT_TEST(CreateRepsonseWithCompressedBody) {
+ NHttp::THttpIncomingRequestPtr request = nullptr;
+ NHttp::THttpOutgoingResponsePtr response = new NHttp::THttpOutgoingResponse(request, "HTTP", "1.1", "200", "OK");
+ response->Set<&NHttp::THttpResponse::ContentEncoding>("gzip");
+ TString compressedBody = "compressed body";
+ response->SetBody(compressedBody);
+ UNIT_ASSERT_VALUES_EQUAL("gzip", response->ContentEncoding);
+ UNIT_ASSERT_VALUES_EQUAL(ToString(compressedBody.size()), response->ContentLength);
+ UNIT_ASSERT_VALUES_EQUAL(compressedBody, response->Body);
+ }
+
+ Y_UNIT_TEST(BasicPartialParsing) {
+ NHttp::THttpIncomingRequestPtr request = new NHttp::THttpIncomingRequest();
+ EatPartialString(request, "GET /test HTTP/1.1\r\nHost: test\r\nSome-Header: 32344\r\n\r\n");
+ UNIT_ASSERT_EQUAL(request->Stage, NHttp::THttpIncomingRequest::EParseStage::Done);
+ UNIT_ASSERT_EQUAL(request->Method, "GET");
+ UNIT_ASSERT_EQUAL(request->URL, "/test");
+ UNIT_ASSERT_EQUAL(request->Protocol, "HTTP");
+ UNIT_ASSERT_EQUAL(request->Version, "1.1");
+ UNIT_ASSERT_EQUAL(request->Host, "test");
+ UNIT_ASSERT_EQUAL(request->Headers, "Host: test\r\nSome-Header: 32344\r\n\r\n");
+ }
+
+ Y_UNIT_TEST(BasicPartialParsingChunkedBody) {
+ NHttp::THttpOutgoingRequestPtr request = nullptr; //new NHttp::THttpOutgoingRequest();
+ NHttp::THttpIncomingResponsePtr response = new NHttp::THttpIncomingResponse(request);
+ EatPartialString(response, "HTTP/1.1 200 OK\r\nConnection: close\r\nTransfer-Encoding: chunked\r\n\r\n4\r\nthis\r\n4\r\n is \r\n5\r\ntest.\r\n0\r\n\r\n");
+ UNIT_ASSERT_EQUAL(response->Stage, NHttp::THttpIncomingResponse::EParseStage::Done);
+ UNIT_ASSERT_EQUAL(response->Status, "200");
+ UNIT_ASSERT_EQUAL(response->Connection, "close");
+ UNIT_ASSERT_EQUAL(response->Protocol, "HTTP");
+ UNIT_ASSERT_EQUAL(response->Version, "1.1");
+ UNIT_ASSERT_EQUAL(response->TransferEncoding, "chunked");
+ UNIT_ASSERT_EQUAL(response->Body, "this is test.");
+ }
+
+ Y_UNIT_TEST(AdvancedParsing) {
+ NHttp::THttpIncomingRequestPtr request = new NHttp::THttpIncomingRequest();
+ EatWholeString(request, "GE");
+ EatWholeString(request, "T");
+ EatWholeString(request, " ");
+ EatWholeString(request, "/test");
+ EatWholeString(request, " HTTP/1.1\r");
+ EatWholeString(request, "\nHo");
+ EatWholeString(request, "st: test");
+ EatWholeString(request, "\r\n");
+ EatWholeString(request, "Some-Header: 32344\r\n\r");
+ EatWholeString(request, "\n");
+ UNIT_ASSERT_EQUAL(request->Stage, NHttp::THttpIncomingRequest::EParseStage::Done);
+ UNIT_ASSERT_EQUAL(request->Method, "GET");
+ UNIT_ASSERT_EQUAL(request->URL, "/test");
+ UNIT_ASSERT_EQUAL(request->Protocol, "HTTP");
+ UNIT_ASSERT_EQUAL(request->Version, "1.1");
+ UNIT_ASSERT_EQUAL(request->Host, "test");
+ UNIT_ASSERT_EQUAL(request->Headers, "Host: test\r\nSome-Header: 32344\r\n\r\n");
+ }
+
+ Y_UNIT_TEST(AdvancedPartialParsing) {
+ NHttp::THttpIncomingRequestPtr request = new NHttp::THttpIncomingRequest();
+ EatPartialString(request, "GE");
+ EatPartialString(request, "T");
+ EatPartialString(request, " ");
+ EatPartialString(request, "/test");
+ EatPartialString(request, " HTTP/1.1\r");
+ EatPartialString(request, "\nHo");
+ EatPartialString(request, "st: test");
+ EatPartialString(request, "\r\n");
+ EatPartialString(request, "Some-Header: 32344\r\n\r");
+ EatPartialString(request, "\n");
+ UNIT_ASSERT_EQUAL(request->Stage, NHttp::THttpIncomingRequest::EParseStage::Done);
+ UNIT_ASSERT_EQUAL(request->Method, "GET");
+ UNIT_ASSERT_EQUAL(request->URL, "/test");
+ UNIT_ASSERT_EQUAL(request->Protocol, "HTTP");
+ UNIT_ASSERT_EQUAL(request->Version, "1.1");
+ UNIT_ASSERT_EQUAL(request->Host, "test");
+ UNIT_ASSERT_EQUAL(request->Headers, "Host: test\r\nSome-Header: 32344\r\n\r\n");
+ }
+
+ Y_UNIT_TEST(BasicRenderBodyWithHeadersAndCookies) {
+ NHttp::THttpOutgoingRequestPtr request = NHttp::THttpOutgoingRequest::CreateRequestGet("http://www.yandex.ru/data/url");
+ NHttp::THeadersBuilder headers;
+ NHttp::TCookiesBuilder cookies;
+ cookies.Set("cookie1", "123456");
+ cookies.Set("cookie2", "45678");
+ headers.Set("Cookie", cookies.Render());
+ request->Set(headers);
+ TString requestData;
+ request->AsString(requestData);
+ UNIT_ASSERT_VALUES_EQUAL(requestData, "GET /data/url HTTP/1.1\r\nHost: www.yandex.ru\r\nAccept: */*\r\nCookie: cookie1=123456; cookie2=45678;\r\n");
+ }
+
+ Y_UNIT_TEST(BasicRunning) {
+ NActors::TTestActorRuntimeBase actorSystem;
+ TPortManager portManager;
+ TIpPort port = portManager.GetTcpPort();
+ TAutoPtr<NActors::IEventHandle> handle;
+ actorSystem.Initialize();
+ NMonitoring::TMetricRegistry sensors;
+
+ NActors::IActor* proxy = NHttp::CreateHttpProxy(sensors);
+ NActors::TActorId proxyId = actorSystem.Register(proxy);
+ actorSystem.Send(new NActors::IEventHandle(proxyId, TActorId(), new NHttp::TEvHttpProxy::TEvAddListeningPort(port)), 0, true);
+ actorSystem.DispatchEvents();
+
+ NActors::TActorId serverId = actorSystem.AllocateEdgeActor();
+ actorSystem.Send(new NActors::IEventHandle(proxyId, serverId, new NHttp::TEvHttpProxy::TEvRegisterHandler("/test", serverId)), 0, true);
+
+ NActors::TActorId clientId = actorSystem.AllocateEdgeActor();
+ NHttp::THttpOutgoingRequestPtr httpRequest = NHttp::THttpOutgoingRequest::CreateRequestGet("http://[::1]:" + ToString(port) + "/test");
+ actorSystem.Send(new NActors::IEventHandle(proxyId, clientId, new NHttp::TEvHttpProxy::TEvHttpOutgoingRequest(httpRequest)), 0, true);
+
+ NHttp::TEvHttpProxy::TEvHttpIncomingRequest* request = actorSystem.GrabEdgeEvent<NHttp::TEvHttpProxy::TEvHttpIncomingRequest>(handle);
+
+ UNIT_ASSERT_EQUAL(request->Request->URL, "/test");
+
+ NHttp::THttpOutgoingResponsePtr httpResponse = request->Request->CreateResponseString("HTTP/1.1 200 Found\r\nConnection: Close\r\nTransfer-Encoding: chunked\r\n\r\n6\r\npassed\r\n0\r\n\r\n");
+ actorSystem.Send(new NActors::IEventHandle(handle->Sender, serverId, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(httpResponse)), 0, true);
+
+ NHttp::TEvHttpProxy::TEvHttpIncomingResponse* response = actorSystem.GrabEdgeEvent<NHttp::TEvHttpProxy::TEvHttpIncomingResponse>(handle);
+
+ UNIT_ASSERT_EQUAL(response->Response->Status, "200");
+ UNIT_ASSERT_EQUAL(response->Response->Body, "passed");
+ }
+
+ Y_UNIT_TEST(TlsRunning) {
+ NActors::TTestActorRuntimeBase actorSystem;
+ TPortManager portManager;
+ TIpPort port = portManager.GetTcpPort();
+ TAutoPtr<NActors::IEventHandle> handle;
+ actorSystem.Initialize();
+ NMonitoring::TMetricRegistry sensors;
+
+ TString certificateContent = R"___(-----BEGIN PRIVATE KEY-----
+MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQCzRZjodO7Aqe1w
+RyOj6kG6g2nn8ZGAxfao4mLT0jDTbVksrhV/h2s3uldLkFo5WrNQ8WZe+iIbXeFL
+s8tO6hslzreo9sih2IHoRcH5KnS/6YTqVhRTJb1jE2dM8NwYbwTi+T2Pe0FrBPjI
+kgVO50gAtYl9C+fc715uZiSKW+rRlP5OoFTwxrOjiU27RPZjFYyWK9wTI1Es9uRr
+lbZbLl5cY6dK2J1AViRraaYKCWO26VbOPWLsY4OD3e+ZXIc3OMCz6Yb0wmRPeJ60
+bbbkGfI8O27kDdv69MAWHIm0yYMzKEnom1dce7rNQNDEqJfocsYIsg+EvayT1yQ9
+KTBegw7LAgMBAAECggEBAKaOCrotqYQmXArsjRhFFDwMy+BKdzyEr93INrlFl0dX
+WHpCYobRcbOc1G3H94tB0UdqgAnNqtJyLlb+++ydZAuEOu4oGc8EL+10ofq0jzOd
+6Xct8kQt0/6wkFDTlii9PHUDy0X65ZRgUiNGRtg/2I2QG+SpowmI+trm2xwQueFs
+VaWrjc3cVvXx0b8Lu7hqZUv08kgC38stzuRk/n2T5VWSAr7Z4ZWQbO918Dv35HUw
+Wy/0jNUFP9CBCvFJ4l0OoH9nYhWFG+HXWzNdw6/Hca4jciRKo6esCiOZ9uWYv/ec
+/NvX9rgFg8G8/SrTisX10+Bbeq+R1RKwq/IG409TH4ECgYEA14L+3QsgNIUMeYAx
+jSCyk22R/tOHI1BM+GtKPUhnwHlAssrcPcxXMJovl6WL93VauYjym0wpCz9urSpA
+I2CqTsG8GYciA6Dr3mHgD6cK0jj9UPAU6EnZ5S0mjhPqKZqutu9QegzD2uESvuN8
+36xezwQthzAf0nI/P3sJGjVXjikCgYEA1POm5xcV6SmM6HnIdadEebhzZIJ9TXQz
+ry3Jj3a7CKyD5C7fAdkHUTCjgT/2ElxPi9ABkZnC+d/cW9GtJFa0II5qO/agm3KQ
+ZXYiutu9A7xACHYFXRiJEjVUdGG9dKMVOHUEa8IHEgrrcUVM/suy/GgutywIfaXs
+y58IFP24K9MCgYEAk6zjz7wL+XEiNy+sxLQfKf7vB9sSwxQHakK6wHuY/L8Zomp3
+uLEJHfjJm/SIkK0N2g0JkXkCtv5kbKyC/rsCeK0wo52BpVLjzaLr0k34kE0U6B1b
+dkEE2pGx1bG3x4KDLj+Wuct9ecK5Aa0IqIyI+vo16GkFpUM8K9e3SQo8UOECgYEA
+sCZYAkILYtJ293p9giz5rIISGasDAUXE1vxWBXEeJ3+kneTTnZCrx9Im/ewtnWR0
+fF90XL9HFDDD88POqAd8eo2zfKR2l/89SGBfPBg2EtfuU9FkgGyiPciVcqvC7q9U
+B15saMKX3KnhtdGwbfeLt9RqCCTJZT4SUSDcq5hwdvcCgYAxY4Be8mNipj8Cgg22
+mVWSolA0TEzbtUcNk6iGodpi+Z0LKpsPC0YRqPRyh1K+rIltG1BVdmUBHcMlOYxl
+lWWvbJH6PkJWy4n2MF7PO45kjN3pPZg4hgH63JjZeAineBwEArUGb9zHnvzcdRvF
+wuQ2pZHL/HJ0laUSieHDJ5917w==
+-----END PRIVATE KEY-----
+
+
+-----BEGIN CERTIFICATE-----
+MIIDjTCCAnWgAwIBAgIURt5IBx0J3xgEaQvmyrFH2A+NkpMwDQYJKoZIhvcNAQEL
+BQAwVjELMAkGA1UEBhMCUlUxDzANBgNVBAgMBk1vc2NvdzEPMA0GA1UEBwwGTW9z
+Y293MQ8wDQYDVQQKDAZZYW5kZXgxFDASBgNVBAMMC3Rlc3Qtc2VydmVyMB4XDTE5
+MDkyMDE3MTQ0MVoXDTQ3MDIwNDE3MTQ0MVowVjELMAkGA1UEBhMCUlUxDzANBgNV
+BAgMBk1vc2NvdzEPMA0GA1UEBwwGTW9zY293MQ8wDQYDVQQKDAZZYW5kZXgxFDAS
+BgNVBAMMC3Rlc3Qtc2VydmVyMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
+AQEAs0WY6HTuwKntcEcjo+pBuoNp5/GRgMX2qOJi09Iw021ZLK4Vf4drN7pXS5Ba
+OVqzUPFmXvoiG13hS7PLTuobJc63qPbIodiB6EXB+Sp0v+mE6lYUUyW9YxNnTPDc
+GG8E4vk9j3tBawT4yJIFTudIALWJfQvn3O9ebmYkilvq0ZT+TqBU8Mazo4lNu0T2
+YxWMlivcEyNRLPbka5W2Wy5eXGOnStidQFYka2mmCgljtulWzj1i7GODg93vmVyH
+NzjAs+mG9MJkT3ietG225BnyPDtu5A3b+vTAFhyJtMmDMyhJ6JtXXHu6zUDQxKiX
+6HLGCLIPhL2sk9ckPSkwXoMOywIDAQABo1MwUTAdBgNVHQ4EFgQUDv/xuJ4CvCgG
+fPrZP3hRAt2+/LwwHwYDVR0jBBgwFoAUDv/xuJ4CvCgGfPrZP3hRAt2+/LwwDwYD
+VR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAinKpMYaA2tjLpAnPVbjy
+/ZxSBhhB26RiQp3Re8XOKyhTWqgYE6kldYT0aXgK9x9mPC5obQannDDYxDc7lX+/
+qP/u1X81ZcDRo/f+qQ3iHfT6Ftt/4O3qLnt45MFM6Q7WabRm82x3KjZTqpF3QUdy
+tumWiuAP5DMd1IRDtnKjFHO721OsEsf6NLcqdX89bGeqXDvrkwg3/PNwTyW5E7cj
+feY8L2eWtg6AJUnIBu11wvfzkLiH3QKzHvO/SIZTGf5ihDsJ3aKEE9UNauTL3bVc
+CRA/5XcX13GJwHHj6LCoc3sL7mt8qV9HKY2AOZ88mpObzISZxgPpdKCfjsrdm63V
+6g==
+-----END CERTIFICATE-----)___";
+
+ TTempFileHandle certificateFile;
+
+ certificateFile.Write(certificateContent.data(), certificateContent.size());
+
+ NActors::IActor* proxy = NHttp::CreateHttpProxy(sensors);
+ NActors::TActorId proxyId = actorSystem.Register(proxy);
+
+ THolder<NHttp::TEvHttpProxy::TEvAddListeningPort> add = MakeHolder<NHttp::TEvHttpProxy::TEvAddListeningPort>(port);
+ ///////// https configuration
+ add->Secure = true;
+ add->CertificateFile = certificateFile.Name();
+ add->PrivateKeyFile = certificateFile.Name();
+ /////////
+ actorSystem.Send(new NActors::IEventHandle(proxyId, TActorId(), add.Release()), 0, true);
+ actorSystem.DispatchEvents();
+
+ NActors::TActorId serverId = actorSystem.AllocateEdgeActor();
+ actorSystem.Send(new NActors::IEventHandle(proxyId, serverId, new NHttp::TEvHttpProxy::TEvRegisterHandler("/test", serverId)), 0, true);
+
+ NActors::TActorId clientId = actorSystem.AllocateEdgeActor();
+ NHttp::THttpOutgoingRequestPtr httpRequest = NHttp::THttpOutgoingRequest::CreateRequestGet("https://[::1]:" + ToString(port) + "/test");
+ actorSystem.Send(new NActors::IEventHandle(proxyId, clientId, new NHttp::TEvHttpProxy::TEvHttpOutgoingRequest(httpRequest)), 0, true);
+
+ NHttp::TEvHttpProxy::TEvHttpIncomingRequest* request = actorSystem.GrabEdgeEvent<NHttp::TEvHttpProxy::TEvHttpIncomingRequest>(handle);
+
+ UNIT_ASSERT_EQUAL(request->Request->URL, "/test");
+
+ NHttp::THttpOutgoingResponsePtr httpResponse = request->Request->CreateResponseString("HTTP/1.1 200 Found\r\nConnection: Close\r\nTransfer-Encoding: chunked\r\n\r\n6\r\npassed\r\n0\r\n\r\n");
+ actorSystem.Send(new NActors::IEventHandle(handle->Sender, serverId, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(httpResponse)), 0, true);
+
+ NHttp::TEvHttpProxy::TEvHttpIncomingResponse* response = actorSystem.GrabEdgeEvent<NHttp::TEvHttpProxy::TEvHttpIncomingResponse>(handle);
+
+ UNIT_ASSERT_EQUAL(response->Response->Status, "200");
+ UNIT_ASSERT_EQUAL(response->Response->Body, "passed");
+ }
+
+ /*Y_UNIT_TEST(AdvancedRunning) {
+ THolder<NActors::TActorSystemSetup> setup = MakeHolder<NActors::TActorSystemSetup>();
+ setup->NodeId = 1;
+ setup->ExecutorsCount = 1;
+ setup->Executors = new TAutoPtr<NActors::IExecutorPool>[1];
+ setup->Executors[0] = new NActors::TBasicExecutorPool(0, 2, 10);
+ setup->Scheduler = new NActors::TBasicSchedulerThread(NActors::TSchedulerConfig(512, 100));
+ NActors::TActorSystem actorSystem(setup);
+ actorSystem.Start();
+ NHttp::THttpProxy* incomingProxy = new NHttp::THttpProxy();
+ NActors::TActorId incomingProxyId = actorSystem.Register(incomingProxy);
+ actorSystem.Send(incomingProxyId, new NHttp::TEvHttpProxy::TEvAddListeningPort(13337));
+
+ NHttp::THttpProxy* outgoingProxy = new NHttp::THttpProxy();
+ NActors::TActorId outgoingProxyId = actorSystem.Register(outgoingProxy);
+
+ THolder<NHttp::THttpStaticStringRequest> httpRequest = MakeHolder<NHttp::THttpStaticStringRequest>("GET /test HTTP/1.1\r\n\r\n");
+ actorSystem.Send(outgoingProxyId, new NHttp::TEvHttpProxy::TEvHttpOutgoingRequest("[::]:13337", std::move(httpRequest)));
+
+ Sleep(TDuration::Minutes(60));
+ }*/
+
+ Y_UNIT_TEST(TooLongHeader) {
+ NActors::TTestActorRuntimeBase actorSystem;
+ TPortManager portManager;
+ TIpPort port = portManager.GetTcpPort();
+ TAutoPtr<NActors::IEventHandle> handle;
+ actorSystem.Initialize();
+ NMonitoring::TMetricRegistry sensors;
+
+ NActors::IActor* proxy = NHttp::CreateHttpProxy(sensors);
+ NActors::TActorId proxyId = actorSystem.Register(proxy);
+ actorSystem.Send(new NActors::IEventHandle(proxyId, TActorId(), new NHttp::TEvHttpProxy::TEvAddListeningPort(port)), 0, true);
+ actorSystem.DispatchEvents();
+
+ NActors::TActorId serverId = actorSystem.AllocateEdgeActor();
+ actorSystem.Send(new NActors::IEventHandle(proxyId, serverId, new NHttp::TEvHttpProxy::TEvRegisterHandler("/test", serverId)), 0, true);
+
+ NActors::TActorId clientId = actorSystem.AllocateEdgeActor();
+ NHttp::THttpOutgoingRequestPtr httpRequest = NHttp::THttpOutgoingRequest::CreateRequestGet("http://[::1]:" + ToString(port) + "/test");
+ httpRequest->Set("Connection", "close");
+ TString longHeader;
+ longHeader.append(9000, 'X');
+ httpRequest->Set(longHeader, "data");
+ actorSystem.Send(new NActors::IEventHandle(proxyId, clientId, new NHttp::TEvHttpProxy::TEvHttpOutgoingRequest(httpRequest)), 0, true);
+
+ NHttp::TEvHttpProxy::TEvHttpIncomingResponse* response = actorSystem.GrabEdgeEvent<NHttp::TEvHttpProxy::TEvHttpIncomingResponse>(handle);
+
+ UNIT_ASSERT_EQUAL(response->Response->Status, "400");
+ UNIT_ASSERT_EQUAL(response->Response->Body, "Invalid http header");
+ }
+}
diff --git a/library/cpp/actors/http/ut/ya.make b/library/cpp/actors/http/ut/ya.make
new file mode 100644
index 0000000000..8b4c04c4d3
--- /dev/null
+++ b/library/cpp/actors/http/ut/ya.make
@@ -0,0 +1,18 @@
+UNITTEST_FOR(library/cpp/actors/http)
+
+OWNER(xenoxeno)
+
+SIZE(SMALL)
+
+PEERDIR(
+ library/cpp/actors/testlib
+)
+
+IF (NOT OS_WINDOWS)
+SRCS(
+ http_ut.cpp
+)
+ELSE()
+ENDIF()
+
+END()
diff --git a/library/cpp/actors/http/ya.make b/library/cpp/actors/http/ya.make
new file mode 100644
index 0000000000..7ce68b7a75
--- /dev/null
+++ b/library/cpp/actors/http/ya.make
@@ -0,0 +1,33 @@
+RECURSE_FOR_TESTS(ut)
+
+LIBRARY()
+
+OWNER(xenoxeno g:kikimr)
+
+SRCS(
+ http_cache.cpp
+ http_cache.h
+ http_config.h
+ http_proxy_acceptor.cpp
+ http_proxy_incoming.cpp
+ http_proxy_outgoing.cpp
+ http_proxy_sock_impl.h
+ http_proxy_ssl.h
+ http_proxy.cpp
+ http_proxy.h
+ http_static.cpp
+ http_static.h
+ http.cpp
+ http.h
+)
+
+PEERDIR(
+ contrib/libs/openssl
+ library/cpp/actors/core
+ library/cpp/actors/interconnect
+ library/cpp/dns
+ library/cpp/monlib/metrics
+ library/cpp/string_utils/quote
+)
+
+END()
diff --git a/library/cpp/actors/interconnect/channel_scheduler.h b/library/cpp/actors/interconnect/channel_scheduler.h
new file mode 100644
index 0000000000..551a4cb61a
--- /dev/null
+++ b/library/cpp/actors/interconnect/channel_scheduler.h
@@ -0,0 +1,120 @@
+#pragma once
+
+#include "interconnect_channel.h"
+#include "event_holder_pool.h"
+
+#include <memory>
+
+namespace NActors {
+
+ class TChannelScheduler {
+ const ui32 PeerNodeId;
+ std::array<std::optional<TEventOutputChannel>, 16> ChannelArray;
+ THashMap<ui16, TEventOutputChannel> ChannelMap;
+ std::shared_ptr<IInterconnectMetrics> Metrics;
+ TEventHolderPool& Pool;
+ const ui32 MaxSerializedEventSize;
+ const TSessionParams Params;
+
+ struct THeapItem {
+ TEventOutputChannel *Channel;
+ ui64 WeightConsumed = 0;
+
+ friend bool operator <(const THeapItem& x, const THeapItem& y) {
+ return x.WeightConsumed > y.WeightConsumed;
+ }
+ };
+
+ std::vector<THeapItem> Heap;
+
+ public:
+ TChannelScheduler(ui32 peerNodeId, const TChannelsConfig& predefinedChannels,
+ std::shared_ptr<IInterconnectMetrics> metrics, TEventHolderPool& pool, ui32 maxSerializedEventSize,
+ TSessionParams params)
+ : PeerNodeId(peerNodeId)
+ , Metrics(std::move(metrics))
+ , Pool(pool)
+ , MaxSerializedEventSize(maxSerializedEventSize)
+ , Params(std::move(params))
+ {
+ for (const auto& item : predefinedChannels) {
+ GetOutputChannel(item.first);
+ }
+ }
+
+ TEventOutputChannel *PickChannelWithLeastConsumedWeight() {
+ Y_VERIFY(!Heap.empty());
+ return Heap.front().Channel;
+ }
+
+ void AddToHeap(TEventOutputChannel& channel, ui64 counter) {
+ if (channel.IsWorking()) {
+ ui64 weight = channel.WeightConsumedOnPause;
+ weight -= Min(weight, counter - channel.EqualizeCounterOnPause);
+ Heap.push_back(THeapItem{&channel, weight});
+ std::push_heap(Heap.begin(), Heap.end());
+ }
+ }
+
+ void FinishPick(ui64 weightConsumed, ui64 counter) {
+ std::pop_heap(Heap.begin(), Heap.end());
+ auto& item = Heap.back();
+ item.WeightConsumed += weightConsumed;
+ if (item.Channel->IsWorking()) { // reschedule
+ std::push_heap(Heap.begin(), Heap.end());
+ } else { // remove from heap
+ item.Channel->EqualizeCounterOnPause = counter;
+ item.Channel->WeightConsumedOnPause = item.WeightConsumed;
+ Heap.pop_back();
+ }
+ }
+
+ TEventOutputChannel& GetOutputChannel(ui16 channel) {
+ if (channel < ChannelArray.size()) {
+ auto& res = ChannelArray[channel];
+ if (Y_UNLIKELY(!res)) {
+ res.emplace(Pool, channel, PeerNodeId, MaxSerializedEventSize, Metrics,
+ Params);
+ }
+ return *res;
+ } else {
+ auto it = ChannelMap.find(channel);
+ if (Y_UNLIKELY(it == ChannelMap.end())) {
+ it = ChannelMap.emplace(std::piecewise_construct, std::forward_as_tuple(channel),
+ std::forward_as_tuple(Pool, channel, PeerNodeId, MaxSerializedEventSize,
+ Metrics, Params)).first;
+ }
+ return it->second;
+ }
+ }
+
+ ui64 Equalize() {
+ if (Heap.empty()) {
+ return 0; // nothing to do here -- no working channels
+ }
+
+ // find the minimum consumed weight among working channels and then adjust weights
+ ui64 min = Max<ui64>();
+ for (THeapItem& item : Heap) {
+ min = Min(min, item.WeightConsumed);
+ }
+ for (THeapItem& item : Heap) {
+ item.WeightConsumed -= min;
+ }
+ return min;
+ }
+
+ template<typename TCallback>
+ void ForEach(TCallback&& callback) {
+ for (auto& channel : ChannelArray) {
+ if (channel) {
+ callback(*channel);
+ }
+ }
+ for (auto& [id, channel] : ChannelMap) {
+ callback(channel);
+ }
+ }
+ };
+
+} // NActors
diff --git a/library/cpp/actors/interconnect/event_filter.h b/library/cpp/actors/interconnect/event_filter.h
new file mode 100644
index 0000000000..47dabf5f16
--- /dev/null
+++ b/library/cpp/actors/interconnect/event_filter.h
@@ -0,0 +1,72 @@
+#pragma once
+
+#include <library/cpp/actors/core/event.h>
+
+namespace NActors {
+
+ enum class ENodeClass {
+ SYSTEM,
+ LOCAL_TENANT,
+ PEER_TENANT,
+ COUNT
+ };
+
+ class TEventFilter : TNonCopyable {
+ using TRouteMask = ui16;
+
+ TVector<TVector<TRouteMask>> ScopeRoutes;
+
+ public:
+ TEventFilter()
+ : ScopeRoutes(65536)
+ {}
+
+ void RegisterEvent(ui32 type, TRouteMask routes) {
+ auto& evSpaceIndex = ScopeRoutes[type >> 16];
+ const ui16 subtype = type & 65535;
+ size_t size = (subtype + 512) & ~511;
+ if (evSpaceIndex.size() < size) {
+ evSpaceIndex.resize(size);
+ }
+ evSpaceIndex[subtype] = routes;
+ }
+
+ bool CheckIncomingEvent(const IEventHandle& ev, const TScopeId& localScopeId) const {
+ TRouteMask routes = 0;
+ if (const auto& evSpaceIndex = ScopeRoutes[ev.Type >> 16]) {
+ const ui16 subtype = ev.Type & 65535;
+ routes = subtype < evSpaceIndex.size() ? evSpaceIndex[subtype] : 0;
+ } else {
+ routes = ~TRouteMask(); // allow unfilled event spaces by default
+ }
+ return routes & MakeRouteMask(GetNodeClass(ev.OriginScopeId, localScopeId), GetNodeClass(localScopeId, ev.OriginScopeId));
+ }
+
+ static ENodeClass GetNodeClass(const TScopeId& scopeId, const TScopeId& localScopeId) {
+ if (scopeId.first == 0) {
+ // system scope, or null scope
+ return scopeId.second ? ENodeClass::SYSTEM : ENodeClass::COUNT;
+ } else if (scopeId == localScopeId) {
+ return ENodeClass::LOCAL_TENANT;
+ } else {
+ return ENodeClass::PEER_TENANT;
+ }
+ }
+
+ static TRouteMask MakeRouteMask(ENodeClass from, ENodeClass to) {
+ if (from == ENodeClass::COUNT || to == ENodeClass::COUNT) {
+ return 0;
+ }
+ return 1U << (static_cast<unsigned>(from) * static_cast<unsigned>(ENodeClass::COUNT) + static_cast<unsigned>(to));
+ }
+
+ static TRouteMask MakeRouteMask(std::initializer_list<std::pair<ENodeClass, ENodeClass>> items) {
+ TRouteMask mask = 0;
+ for (const auto& p : items) {
+ mask |= MakeRouteMask(p.first, p.second);
+ }
+ return mask;
+ }
+ };
+
+} // NActors
diff --git a/library/cpp/actors/interconnect/event_holder_pool.h b/library/cpp/actors/interconnect/event_holder_pool.h
new file mode 100644
index 0000000000..b6090a3bc8
--- /dev/null
+++ b/library/cpp/actors/interconnect/event_holder_pool.h
@@ -0,0 +1,128 @@
+#pragma once
+
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+
+#include "packet.h"
+
+namespace NActors {
+ struct TEvFreeItems : TEventLocal<TEvFreeItems, EventSpaceBegin(TEvents::ES_PRIVATE)> {
+ static constexpr size_t MaxEvents = 256;
+
+ TList<TTcpPacketOutTask> Items;
+ std::list<TEventHolder> FreeQueue;
+ TStackVec<THolder<IEventBase>, MaxEvents> Events;
+ TStackVec<THolder<TEventSerializedData>, MaxEvents> Buffers;
+ std::shared_ptr<std::atomic<TAtomicBase>> Counter;
+ ui64 NumBytes = 0;
+
+ ~TEvFreeItems() {
+ if (Counter) {
+ TAtomicBase res = Counter->fetch_sub(NumBytes) - NumBytes;
+ Y_VERIFY(res >= 0);
+ }
+ }
+
+ bool GetInLineForDestruction(const TIntrusivePtr<TInterconnectProxyCommon>& common) {
+ Y_VERIFY(!Counter);
+ const auto& counter = common->DestructorQueueSize;
+ const auto& max = common->MaxDestructorQueueSize;
+ if (counter && (TAtomicBase)(counter->fetch_add(NumBytes) + NumBytes) > max) {
+ counter->fetch_sub(NumBytes);
+ return false;
+ }
+ Counter = counter;
+ return true;
+ }
+ };
+
+ class TEventHolderPool {
+ using TDestroyCallback = std::function<void(THolder<IEventBase>)>;
+
+ static constexpr size_t MaxFreeQueueItems = 32;
+ static constexpr size_t FreeQueueTrimThreshold = MaxFreeQueueItems * 2;
+ static constexpr ui64 MaxBytesPerMessage = 10 * 1024 * 1024;
+
+ TIntrusivePtr<TInterconnectProxyCommon> Common;
+ std::list<TEventHolder> Cache;
+ THolder<TEvFreeItems> PendingFreeEvent;
+ TDestroyCallback DestroyCallback;
+
+ public:
+ TEventHolderPool(TIntrusivePtr<TInterconnectProxyCommon> common,
+ TDestroyCallback destroyCallback)
+ : Common(std::move(common))
+ , DestroyCallback(std::move(destroyCallback))
+ {}
+
+ TEventHolder& Allocate(std::list<TEventHolder>& queue) {
+ if (Cache.empty()) {
+ queue.emplace_back();
+ } else {
+ queue.splice(queue.end(), Cache, Cache.begin());
+ }
+ return queue.back();
+ }
+
+ void Release(std::list<TEventHolder>& queue) {
+ for (auto it = queue.begin(); it != queue.end(); ) {
+ Release(queue, it++);
+ }
+ }
+
+ void Release(std::list<TEventHolder>& queue, std::list<TEventHolder>::iterator event) {
+ bool trim = false;
+
+ // release held event, if any
+ if (THolder<IEventBase> ev = std::move(event->Event)) {
+ auto p = GetPendingEvent();
+ p->NumBytes += event->EventSerializedSize;
+ auto& events = p->Events;
+ events.push_back(std::move(ev));
+ trim = trim || events.size() >= TEvFreeItems::MaxEvents || p->NumBytes >= MaxBytesPerMessage;
+ }
+
+ // release buffer, if any
+ if (event->Buffer && event->Buffer.RefCount() == 1) {
+ auto p = GetPendingEvent();
+ p->NumBytes += event->EventSerializedSize;
+ auto& buffers = p->Buffers;
+ buffers.emplace_back(event->Buffer.Release());
+ trim = trim || buffers.size() >= TEvFreeItems::MaxEvents || p->NumBytes >= MaxBytesPerMessage;
+ }
+
+ // free event and trim the cache if its size is exceeded
+ event->Clear();
+ Cache.splice(Cache.end(), queue, event);
+ if (Cache.size() >= FreeQueueTrimThreshold) {
+ auto& freeQueue = GetPendingEvent()->FreeQueue;
+ auto it = Cache.begin();
+ std::advance(it, Cache.size() - MaxFreeQueueItems);
+ freeQueue.splice(freeQueue.end(), Cache, Cache.begin(), it);
+ trim = true;
+ }
+
+ // release items if we have hit the limit
+ if (trim) {
+ Trim();
+ }
+ }
+
+ void Trim() {
+ if (auto ev = std::move(PendingFreeEvent); ev && ev->GetInLineForDestruction(Common)) {
+ DestroyCallback(std::move(ev));
+ }
+
+ // ensure it is dropped
+ PendingFreeEvent.Reset();
+ }
+
+ private:
+ TEvFreeItems* GetPendingEvent() {
+ if (!PendingFreeEvent) {
+ PendingFreeEvent.Reset(new TEvFreeItems);
+ }
+ return PendingFreeEvent.Get();
+ }
+ };
+
+}
diff --git a/library/cpp/actors/interconnect/events_local.h b/library/cpp/actors/interconnect/events_local.h
new file mode 100644
index 0000000000..8a46ffd535
--- /dev/null
+++ b/library/cpp/actors/interconnect/events_local.h
@@ -0,0 +1,403 @@
+#pragma once
+
+#include <library/cpp/actors/core/events.h>
+#include <library/cpp/actors/core/event_local.h>
+#include <library/cpp/actors/protos/interconnect.pb.h>
+#include <util/generic/deque.h>
+#include <util/network/address.h>
+
+#include "interconnect_stream.h"
+#include "packet.h"
+#include "types.h"
+
+namespace NActors {
+ struct TProgramInfo {
+ ui64 PID = 0;
+ ui64 StartTime = 0;
+ ui64 Serial = 0;
+ };
+
+ enum class ENetwork : ui32 {
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // local messages
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ Start = EventSpaceBegin(TEvents::ES_INTERCONNECT_TCP),
+
+ SocketReadyRead = Start,
+ SocketReadyWrite,
+ SocketError,
+ Connect,
+ Disconnect,
+ IncomingConnection,
+ HandshakeAsk,
+ HandshakeAck,
+ HandshakeNak,
+ HandshakeDone,
+ HandshakeFail,
+ Kick,
+ Flush,
+ NodeInfo,
+ BunchOfEventsToDestroy,
+ HandshakeRequest,
+ HandshakeReplyOK,
+ HandshakeReplyError,
+ ResolveAddress,
+ AddressInfo,
+ ResolveError,
+ HTTPStreamStatus,
+ HTTPSendContent,
+ ConnectProtocolWakeup,
+ HTTPProtocolRetry,
+ EvPollerRegister,
+ EvPollerRegisterResult,
+ EvPollerReady,
+ EvUpdateFromInputSession,
+ EvConfirmUpdate,
+ EvSessionBufferSizeRequest,
+ EvSessionBufferSizeResponse,
+ EvProcessPingRequest,
+ EvGetSecureSocket,
+ EvSecureSocket,
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // nonlocal messages; their indices must be preserved in order to work properly while doing rolling update
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ // interconnect load test message
+ EvLoadMessage = Start + 256,
+ };
+
+ struct TEvSocketReadyRead: public TEventLocal<TEvSocketReadyRead, ui32(ENetwork::SocketReadyRead)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketReadyRead, "Network: TEvSocketReadyRead")
+ };
+
+ struct TEvSocketReadyWrite: public TEventLocal<TEvSocketReadyWrite, ui32(ENetwork::SocketReadyWrite)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketReadyWrite, "Network: TEvSocketReadyWrite")
+ };
+
+ struct TEvSocketError: public TEventLocal<TEvSocketError, ui32(ENetwork::SocketError)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketError, ::strerror(Error))
+ TString GetReason() const {
+ return ::strerror(Error);
+ }
+ const int Error;
+ TIntrusivePtr<NInterconnect::TStreamSocket> Socket;
+
+ TEvSocketError(int error, TIntrusivePtr<NInterconnect::TStreamSocket> sock)
+ : Error(error)
+ , Socket(std::move(sock))
+ {
+ }
+ };
+
+ struct TEvSocketConnect: public TEventLocal<TEvSocketConnect, ui32(ENetwork::Connect)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketConnect, "Network: TEvSocketConnect")
+ };
+
+ struct TEvSocketDisconnect: public TEventLocal<TEvSocketDisconnect, ui32(ENetwork::Disconnect)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketDisconnect, "Network: TEvSocketDisconnect")
+ TDisconnectReason Reason;
+
+ TEvSocketDisconnect(TDisconnectReason reason)
+ : Reason(std::move(reason))
+ {
+ }
+ };
+
+ struct TEvHandshakeAsk: public TEventLocal<TEvHandshakeAsk, ui32(ENetwork::HandshakeAsk)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeAsk, "Network: TEvHandshakeAsk")
+ TEvHandshakeAsk(const TActorId& self,
+ const TActorId& peer,
+ ui64 counter)
+ : Self(self)
+ , Peer(peer)
+ , Counter(counter)
+ {
+ }
+ const TActorId Self;
+ const TActorId Peer;
+ const ui64 Counter;
+ };
+
+ struct TEvHandshakeAck: public TEventLocal<TEvHandshakeAck, ui32(ENetwork::HandshakeAck)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeAck, "Network: TEvHandshakeAck")
+
+ TEvHandshakeAck(const TActorId& self, ui64 nextPacket, TSessionParams params)
+ : Self(self)
+ , NextPacket(nextPacket)
+ , Params(std::move(params))
+ {}
+
+ const TActorId Self;
+ const ui64 NextPacket;
+ const TSessionParams Params;
+ };
+
+ struct TEvHandshakeNak : TEventLocal<TEvHandshakeNak, ui32(ENetwork::HandshakeNak)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketReadyRead, "Network: TEvHandshakeNak")
+ };
+
+ struct TEvHandshakeRequest
+ : public TEventLocal<TEvHandshakeRequest,
+ ui32(ENetwork::HandshakeRequest)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeRequest,
+ "Network: TEvHandshakeRequest")
+
+ NActorsInterconnect::THandshakeRequest Record;
+ };
+
+ struct TEvHandshakeReplyOK
+ : public TEventLocal<TEvHandshakeReplyOK,
+ ui32(ENetwork::HandshakeReplyOK)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeReplyOK,
+ "Network: TEvHandshakeReplyOK")
+
+ NActorsInterconnect::THandshakeReply Record;
+ };
+
+ struct TEvHandshakeReplyError
+ : public TEventLocal<TEvHandshakeReplyError,
+ ui32(ENetwork::HandshakeReplyError)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeReplyError,
+ "Network: TEvHandshakeReplyError")
+
+ TEvHandshakeReplyError(TString error) {
+ Record.SetErrorExplaination(error);
+ }
+
+ NActorsInterconnect::THandshakeReply Record;
+ };
+
+ struct TEvIncomingConnection: public TEventLocal<TEvIncomingConnection, ui32(ENetwork::IncomingConnection)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvIncomingConnection, "Network: TEvIncomingConnection")
+ TIntrusivePtr<NInterconnect::TStreamSocket> Socket;
+ NInterconnect::TAddress Address;
+
+ TEvIncomingConnection(TIntrusivePtr<NInterconnect::TStreamSocket> socket, NInterconnect::TAddress address)
+ : Socket(std::move(socket))
+ , Address(std::move(address))
+ {}
+ };
+
+ struct TEvHandshakeDone: public TEventLocal<TEvHandshakeDone, ui32(ENetwork::HandshakeDone)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeDone, "Network: TEvHandshakeDone")
+
+ TEvHandshakeDone(
+ TIntrusivePtr<NInterconnect::TStreamSocket> socket,
+ const TActorId& peer,
+ const TActorId& self,
+ ui64 nextPacket,
+ TAutoPtr<TProgramInfo>&& programInfo,
+ TSessionParams params)
+ : Socket(std::move(socket))
+ , Peer(peer)
+ , Self(self)
+ , NextPacket(nextPacket)
+ , ProgramInfo(std::move(programInfo))
+ , Params(std::move(params))
+ {
+ }
+
+ TIntrusivePtr<NInterconnect::TStreamSocket> Socket;
+ const TActorId Peer;
+ const TActorId Self;
+ const ui64 NextPacket;
+ TAutoPtr<TProgramInfo> ProgramInfo;
+ const TSessionParams Params;
+ };
+
+ struct TEvHandshakeFail: public TEventLocal<TEvHandshakeFail, ui32(ENetwork::HandshakeFail)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeFail, "Network: TEvHandshakeFail")
+
+ enum EnumHandshakeFail {
+ HANDSHAKE_FAIL_TRANSIENT,
+ HANDSHAKE_FAIL_PERMANENT,
+ HANDSHAKE_FAIL_SESSION_MISMATCH,
+ };
+
+ TEvHandshakeFail(EnumHandshakeFail temporary, TString explanation)
+ : Temporary(temporary)
+ , Explanation(std::move(explanation))
+ {
+ }
+
+ const EnumHandshakeFail Temporary;
+ const TString Explanation;
+ };
+
+ struct TEvKick: public TEventLocal<TEvKick, ui32(ENetwork::Kick)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvKick, "Network: TEvKick")
+ };
+
+ struct TEvFlush: public TEventLocal<TEvFlush, ui32(ENetwork::Flush)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvFlush, "Network: TEvFlush")
+ };
+
+ struct TEvLocalNodeInfo
+ : public TEventLocal<TEvLocalNodeInfo, ui32(ENetwork::NodeInfo)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvLocalNodeInfo, "Network: TEvLocalNodeInfo")
+
+ ui32 NodeId;
+ NAddr::IRemoteAddrPtr Address;
+ };
+
+ struct TEvBunchOfEventsToDestroy : TEventLocal<TEvBunchOfEventsToDestroy, ui32(ENetwork::BunchOfEventsToDestroy)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvBunchOfEventsToDestroy,
+ "Network: TEvBunchOfEventsToDestroy")
+
+ TEvBunchOfEventsToDestroy(TDeque<TAutoPtr<IEventBase>> events)
+ : Events(std::move(events))
+ {
+ }
+
+ TDeque<TAutoPtr<IEventBase>> Events;
+ };
+
+ struct TEvResolveAddress
+ : public TEventLocal<TEvResolveAddress, ui32(ENetwork::ResolveAddress)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvResolveAddress, "Network: TEvResolveAddress")
+
+ TString Address;
+ ui16 Port;
+ };
+
+ struct TEvAddressInfo
+ : public TEventLocal<TEvAddressInfo, ui32(ENetwork::AddressInfo)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvAddressInfo, "Network: TEvAddressInfo")
+
+ NAddr::IRemoteAddrPtr Address;
+ };
+
+ struct TEvResolveError
+ : public TEventLocal<TEvResolveError, ui32(ENetwork::ResolveError)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvResolveError, "Network: TEvResolveError")
+
+ TString Explain;
+ };
+
+ struct TEvHTTPStreamStatus
+ : public TEventLocal<TEvHTTPStreamStatus, ui32(ENetwork::HTTPStreamStatus)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvHTTPStreamStatus,
+ "Network: TEvHTTPStreamStatus")
+ enum EStatus {
+ READY,
+ COMPLETE,
+ ERROR,
+ };
+
+ EStatus Status;
+ TString Error;
+ TString HttpHeaders;
+ };
+
+ struct TEvHTTPSendContent
+ : public TEventLocal<TEvHTTPSendContent, ui32(ENetwork::HTTPSendContent)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvHTTPSendContent, "Network: TEvHTTPSendContent")
+
+ const char* Data;
+ size_t Len;
+ bool Last;
+ };
+
+ struct TEvConnectWakeup
+ : public TEventLocal<TEvConnectWakeup,
+ ui32(ENetwork::ConnectProtocolWakeup)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvConnectWakeup, "Protocols: TEvConnectWakeup")
+ };
+
+ struct TEvHTTPProtocolRetry
+ : public TEventLocal<TEvHTTPProtocolRetry,
+ ui32(ENetwork::HTTPProtocolRetry)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvHTTPProtocolRetry,
+ "Protocols: TEvHTTPProtocolRetry")
+ };
+
+ struct TEvLoadMessage
+ : TEventPB<TEvLoadMessage, NActorsInterconnect::TEvLoadMessage, static_cast<ui32>(ENetwork::EvLoadMessage)> {
+ TEvLoadMessage() = default;
+
+ template <typename TContainer>
+ TEvLoadMessage(const TContainer& route, const TString& id, const TString* payload) {
+ for (const TActorId& actorId : route) {
+ auto* hop = Record.AddHops();
+ if (actorId) {
+ ActorIdToProto(actorId, hop->MutableNextHop());
+ }
+ }
+ Record.SetId(id);
+ if (payload) {
+ Record.SetPayload(*payload);
+ }
+ }
+
+ template <typename TContainer>
+ TEvLoadMessage(const TContainer& route, const TString& id, TRope&& payload) {
+ for (const TActorId& actorId : route) {
+ auto* hop = Record.AddHops();
+ if (actorId) {
+ ActorIdToProto(actorId, hop->MutableNextHop());
+ }
+ }
+ Record.SetId(id);
+ AddPayload(std::move(payload));
+ }
+ };
+
+ struct TEvUpdateFromInputSession : TEventLocal<TEvUpdateFromInputSession, static_cast<ui32>(ENetwork::EvUpdateFromInputSession)> {
+ ui64 ConfirmedByInput; // latest Confirm value from processed input packet
+ ui64 NumDataBytes;
+ TDuration Ping;
+
+ TEvUpdateFromInputSession(ui64 confirmedByInput, ui64 numDataBytes, TDuration ping)
+ : ConfirmedByInput(confirmedByInput)
+ , NumDataBytes(numDataBytes)
+ , Ping(ping)
+ {
+ }
+ };
+
+ struct TEvConfirmUpdate : TEventLocal<TEvConfirmUpdate, static_cast<ui32>(ENetwork::EvConfirmUpdate)>
+ {};
+
+ struct TEvSessionBufferSizeRequest : TEventLocal<TEvSessionBufferSizeRequest, static_cast<ui32>(ENetwork::EvSessionBufferSizeRequest)> {
+ //DEFINE_SIMPLE_LOCAL_EVENT(TEvSessionBufferSizeRequest, "Session: TEvSessionBufferSizeRequest")
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvSessionBufferSizeRequest, "Network: TEvSessionBufferSizeRequest");
+ };
+
+ struct TEvSessionBufferSizeResponse : TEventLocal<TEvSessionBufferSizeResponse, static_cast<ui32>(ENetwork::EvSessionBufferSizeResponse)> {
+ TEvSessionBufferSizeResponse(const TActorId& sessionId, ui64 outputBufferSize)
+ : SessionID(sessionId)
+ , BufferSize(outputBufferSize)
+ {
+ }
+
+ TActorId SessionID;
+ ui64 BufferSize;
+ };
+
+ struct TEvProcessPingRequest : TEventLocal<TEvProcessPingRequest, static_cast<ui32>(ENetwork::EvProcessPingRequest)> {
+ const ui64 Payload;
+
+ TEvProcessPingRequest(ui64 payload)
+ : Payload(payload)
+ {}
+ };
+
+ struct TEvGetSecureSocket : TEventLocal<TEvGetSecureSocket, (ui32)ENetwork::EvGetSecureSocket> {
+ TIntrusivePtr<NInterconnect::TStreamSocket> Socket;
+
+ TEvGetSecureSocket(TIntrusivePtr<NInterconnect::TStreamSocket> socket)
+ : Socket(std::move(socket))
+ {}
+ };
+
+ struct TEvSecureSocket : TEventLocal<TEvSecureSocket, (ui32)ENetwork::EvSecureSocket> {
+ TIntrusivePtr<NInterconnect::TSecureSocket> Socket;
+
+ TEvSecureSocket(TIntrusivePtr<NInterconnect::TSecureSocket> socket)
+ : Socket(std::move(socket))
+ {}
+ };
+
+}
diff --git a/library/cpp/actors/interconnect/interconnect.h b/library/cpp/actors/interconnect/interconnect.h
new file mode 100644
index 0000000000..225a5243fd
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect.h
@@ -0,0 +1,179 @@
+#pragma once
+
+#include <library/cpp/actors/core/actorsystem.h>
+#include <library/cpp/actors/core/interconnect.h>
+#include <util/generic/map.h>
+#include <util/network/address.h>
+
+namespace NActors {
+ struct TInterconnectGlobalState: public TThrRefBase {
+ TString SelfAddress;
+ ui32 SelfPort;
+
+ TVector<TActorId> GlobalNameservers; // todo: add some info about (like expected reply time)
+ };
+
+ struct TInterconnectProxySetup: public TThrRefBase {
+ // synchronous (session -> proxy)
+ struct IProxy : TNonCopyable {
+ virtual ~IProxy() {
+ }
+
+ virtual void ActivateSession(const TActorContext& ctx) = 0; // session activated
+ virtual void DetachSession(const TActorContext& ctx) = 0; // session is dead
+ };
+
+ // synchronous (proxy -> session)
+ struct ISession : TNonCopyable {
+ virtual ~ISession() {
+ }
+
+ virtual void DetachSession(const TActorContext& ownerCtx, const TActorContext& sessionCtx) = 0; // kill yourself
+ virtual void ForwardPacket(TAutoPtr<IEventHandle>& ev, const TActorContext& ownerCtx, const TActorContext& sessionCtx) = 0; // receive packet for forward
+ virtual void Connect(const TActorContext& ownerCtx, const TActorContext& sessionCtx) = 0; // begin connection
+ virtual bool ReceiveIncomingSession(TAutoPtr<IEventHandle>& ev, const TActorContext& ownerCtx, const TActorContext& sessionCtx) = 0; // handle incoming session, if returns true - then session is dead and must be recreated with new one
+ };
+
+ ui32 DestinationNode;
+
+ TString StaticAddress; // if set - would be used as main destination address
+ int StaticPort;
+
+ TIntrusivePtr<TInterconnectGlobalState> GlobalState;
+
+ virtual IActor* CreateSession(const TActorId& ownerId, IProxy* owner) = 0; // returned actor is session and would be attached to same mailbox as proxy to allow sync calls
+ virtual TActorSetupCmd CreateAcceptor() = 0;
+ };
+
+ struct TNameserverSetup {
+ TActorId ServiceID;
+
+ TIntrusivePtr<TInterconnectGlobalState> GlobalState;
+ };
+
+ struct TTableNameserverSetup: public TThrRefBase {
+ struct TNodeInfo {
+ TString Address;
+ TString Host;
+ TString ResolveHost;
+ ui16 Port;
+ TNodeLocation Location;
+ TString& first;
+ ui16& second;
+
+ TNodeInfo()
+ : first(Address)
+ , second(Port)
+ {
+ }
+
+ TNodeInfo(const TNodeInfo&) = default;
+
+ // for testing purposes only
+ TNodeInfo(const TString& address, const TString& host, ui16 port)
+ : TNodeInfo()
+ {
+ Address = address;
+ Host = host;
+ ResolveHost = host;
+ Port = port;
+ }
+
+ TNodeInfo(const TString& address,
+ const TString& host,
+ const TString& resolveHost,
+ ui16 port,
+ const TNodeLocation& location)
+ : TNodeInfo()
+ {
+ Address = address;
+ Host = host;
+ ResolveHost = resolveHost;
+ Port = port;
+ Location = location;
+ }
+
+ // for testing purposes only
+ TNodeInfo& operator=(const std::pair<TString, ui32>& pr) {
+ Address = pr.first;
+ Host = pr.first;
+ ResolveHost = pr.first;
+ Port = pr.second;
+ return *this;
+ }
+
+ TNodeInfo& operator=(const TNodeInfo& ni) {
+ Address = ni.Address;
+ Host = ni.Host;
+ ResolveHost = ni.ResolveHost;
+ Port = ni.Port;
+ Location = ni.Location;
+ return *this;
+ }
+ };
+
+ TMap<ui32, TNodeInfo> StaticNodeTable;
+
+ bool IsEntriesUnique() const;
+ };
+
+ struct TNodeRegistrarSetup {
+ TActorId ServiceID;
+
+ TIntrusivePtr<TInterconnectGlobalState> GlobalState;
+ };
+
+ TActorId GetNameserviceActorId();
+
+ /**
+ * Const table-lookup based name service
+ */
+
+ IActor* CreateNameserverTable(
+ const TIntrusivePtr<TTableNameserverSetup>& setup,
+ ui32 poolId = 0);
+
+ /**
+ * Name service which can be paired with external discovery service.
+ * Copies information from setup on the start (table may be empty).
+ * Handles TEvNodesInfo to change list of known nodes.
+ *
+ * If PendingPeriod is not zero, wait for unknown nodeId
+ */
+
+ IActor* CreateDynamicNameserver(
+ const TIntrusivePtr<TTableNameserverSetup>& setup,
+ const TDuration& pendingPeriod = TDuration::Zero(),
+ ui32 poolId = 0);
+
+ /**
+ * Creates an actor that resolves host/port and replies with either:
+ *
+ * - TEvLocalNodeInfo on success
+ * - TEvResolveError on errors
+ *
+ * Optional defaultAddress may be used as fallback.
+ */
+ IActor* CreateResolveActor(
+ const TString& host, ui16 port, ui32 nodeId, const TString& defaultAddress,
+ const TActorId& replyTo, const TActorId& replyFrom, TInstant deadline);
+
+ inline IActor* CreateResolveActor(
+ ui32 nodeId, const TTableNameserverSetup::TNodeInfo& nodeInfo,
+ const TActorId& replyTo, const TActorId& replyFrom, TInstant deadline)
+ {
+ return CreateResolveActor(nodeInfo.ResolveHost, nodeInfo.Port, nodeId, nodeInfo.Address,
+ replyTo, replyFrom, deadline);
+ }
+
+ /**
+ * Creates an actor that resolves host/port and replies with either:
+ *
+ * - TEvAddressInfo on success
+ * - TEvResolveError on errors
+ */
+ IActor* CreateResolveActor(
+ const TString& host, ui16 port,
+ const TActorId& replyTo, const TActorId& replyFrom, TInstant deadline);
+
+}
diff --git a/library/cpp/actors/interconnect/interconnect_address.cpp b/library/cpp/actors/interconnect/interconnect_address.cpp
new file mode 100644
index 0000000000..8f474f5a39
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_address.cpp
@@ -0,0 +1,94 @@
+#include "interconnect_address.h"
+
+#include <util/string/cast.h>
+#include <util/system/file.h>
+
+#if defined(_linux_)
+#include <sys/un.h>
+#include <sys/stat.h>
+#endif
+
+namespace NInterconnect {
+ TAddress::TAddress() {
+ memset(&Addr, 0, sizeof(Addr));
+ }
+
+ TAddress::TAddress(NAddr::IRemoteAddr& addr) {
+ socklen_t len = addr.Len();
+ Y_VERIFY(len <= sizeof(Addr));
+ memcpy(&Addr.Generic, addr.Addr(), len);
+ }
+
+ int TAddress::GetFamily() const {
+ return Addr.Generic.sa_family;
+ }
+
+ socklen_t TAddress::Size() const {
+ switch (Addr.Generic.sa_family) {
+ case AF_INET6:
+ return sizeof(sockaddr_in6);
+ case AF_INET:
+ return sizeof(sockaddr_in);
+ default:
+ return 0;
+ }
+ }
+
+ sockaddr* TAddress::SockAddr() {
+ return &Addr.Generic;
+ }
+
+ const sockaddr* TAddress::SockAddr() const {
+ return &Addr.Generic;
+ }
+
+ ui16 TAddress::GetPort() const {
+ switch (Addr.Generic.sa_family) {
+ case AF_INET6:
+ return ntohs(Addr.Ipv6.sin6_port);
+ case AF_INET:
+ return ntohs(Addr.Ipv4.sin_port);
+ default:
+ return 0;
+ }
+ }
+
+ TString TAddress::ToString() const {
+ return GetAddress() + ":" + ::ToString(GetPort());
+ }
+
+ TAddress::TAddress(const char* addr, ui16 port) {
+ memset(&Addr, 0, sizeof(Addr));
+ if (inet_pton(Addr.Ipv6.sin6_family = AF_INET6, addr, &Addr.Ipv6.sin6_addr)) {
+ Addr.Ipv6.sin6_port = htons(port);
+ } else if (inet_pton(Addr.Ipv4.sin_family = AF_INET, addr, &Addr.Ipv4.sin_addr)) {
+ Addr.Ipv4.sin_port = htons(port);
+ }
+ }
+
+ TAddress::TAddress(const TString& addr, ui16 port)
+ : TAddress(addr.data(), port)
+ {}
+
+ TString TAddress::GetAddress() const {
+ const void *src;
+ socklen_t size;
+
+ switch (Addr.Generic.sa_family) {
+ case AF_INET6:
+ std::tie(src, size) = std::make_tuple(&Addr.Ipv6.sin6_addr, INET6_ADDRSTRLEN);
+ break;
+
+ case AF_INET:
+ std::tie(src, size) = std::make_tuple(&Addr.Ipv4.sin_addr, INET_ADDRSTRLEN);
+ break;
+
+ default:
+ return TString();
+ }
+
+ char *buffer = static_cast<char*>(alloca(size));
+ const char *p = inet_ntop(Addr.Generic.sa_family, const_cast<void*>(src), buffer, size);
+ return p ? TString(p) : TString();
+ }
+}
diff --git a/library/cpp/actors/interconnect/interconnect_address.h b/library/cpp/actors/interconnect/interconnect_address.h
new file mode 100644
index 0000000000..e9e0faec81
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_address.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <util/system/defaults.h>
+#include <util/network/init.h>
+#include <util/network/address.h>
+#include <util/generic/string.h>
+
+namespace NInterconnect {
+ class TAddress {
+ union {
+ sockaddr Generic;
+ sockaddr_in Ipv4;
+ sockaddr_in6 Ipv6;
+ } Addr;
+
+ public:
+ TAddress();
+ TAddress(const char* addr, ui16 port);
+ TAddress(const TString& addr, ui16 port);
+ TAddress(NAddr::IRemoteAddr& addr);
+ int GetFamily() const;
+ socklen_t Size() const;
+ ::sockaddr* SockAddr();
+ const ::sockaddr* SockAddr() const;
+ ui16 GetPort() const;
+ TString GetAddress() const;
+ TString ToString() const;
+ };
+}
diff --git a/library/cpp/actors/interconnect/interconnect_channel.cpp b/library/cpp/actors/interconnect/interconnect_channel.cpp
new file mode 100644
index 0000000000..a66ba2a154
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_channel.cpp
@@ -0,0 +1,176 @@
+#include "interconnect_channel.h"
+
+#include <library/cpp/actors/core/events.h>
+#include <library/cpp/actors/core/executor_thread.h>
+#include <library/cpp/actors/core/log.h>
+#include <library/cpp/actors/core/probes.h>
+#include <library/cpp/actors/protos/services_common.pb.h>
+#include <library/cpp/actors/prof/tag.h>
+#include <library/cpp/digest/crc32c/crc32c.h>
+
+LWTRACE_USING(ACTORLIB_PROVIDER);
+
+namespace NActors {
+ DECLARE_WILSON_EVENT(EventSentToSocket);
+ DECLARE_WILSON_EVENT(EventReceivedFromSocket);
+
+ bool TEventOutputChannel::FeedDescriptor(TTcpPacketOutTask& task, TEventHolder& event, ui64 *weightConsumed) {
+ const size_t amount = sizeof(TChannelPart) + sizeof(TEventDescr);
+ if (task.GetVirtualFreeAmount() < amount) {
+ return false;
+ }
+
+ NWilson::TTraceId traceId(event.Descr.TraceId);
+// if (ctx) {
+// WILSON_TRACE(*ctx, &traceId, EventSentToSocket);
+// }
+ traceId.Serialize(&event.Descr.TraceId);
+ LWTRACK(SerializeToPacketEnd, event.Orbit, PeerNodeId, ChannelId, OutputQueueSize, task.GetDataSize());
+ task.Orbit.Take(event.Orbit);
+
+ event.Descr.Flags = (event.Descr.Flags & ~IEventHandle::FlagForwardOnNondelivery) |
+ (ExtendedFormat ? IEventHandle::FlagExtendedFormat : 0);
+
+ TChannelPart *part = static_cast<TChannelPart*>(task.GetFreeArea());
+ part->Channel = ChannelId | TChannelPart::LastPartFlag;
+ part->Size = sizeof(TEventDescr);
+ memcpy(part + 1, &event.Descr, sizeof(TEventDescr));
+ task.AppendBuf(part, amount);
+ *weightConsumed += amount;
+ OutputQueueSize -= part->Size;
+ Metrics->UpdateOutputChannelEvents(ChannelId);
+
+ return true;
+ }
+
+ void TEventOutputChannel::DropConfirmed(ui64 confirm) {
+ LOG_DEBUG_IC_SESSION("ICOCH98", "Dropping confirmed messages");
+ for (auto it = NotYetConfirmed.begin(); it != NotYetConfirmed.end() && it->Serial <= confirm; ) {
+ Pool.Release(NotYetConfirmed, it++);
+ }
+ }
+
+ bool TEventOutputChannel::FeedBuf(TTcpPacketOutTask& task, ui64 serial, ui64 *weightConsumed) {
+ for (;;) {
+ Y_VERIFY(!Queue.empty());
+ TEventHolder& event = Queue.front();
+
+ switch (State) {
+ case EState::INITIAL:
+ event.InitChecksum();
+ LWTRACK(SerializeToPacketBegin, event.Orbit, PeerNodeId, ChannelId, OutputQueueSize);
+ if (event.Event) {
+ State = EState::CHUNKER;
+ IEventBase *base = event.Event.Get();
+ Chunker.SetSerializingEvent(base);
+ ExtendedFormat = base->IsExtendedFormat();
+ } else if (event.Buffer) {
+ State = EState::BUFFER;
+ Iter = event.Buffer->GetBeginIter();
+ ExtendedFormat = event.Buffer->IsExtendedFormat();
+ } else {
+ State = EState::DESCRIPTOR;
+ ExtendedFormat = false;
+ }
+ break;
+
+ case EState::CHUNKER:
+ case EState::BUFFER: {
+ size_t maxBytes = task.GetVirtualFreeAmount();
+ if (maxBytes <= sizeof(TChannelPart)) {
+ return false;
+ }
+
+ TChannelPart *part = static_cast<TChannelPart*>(task.GetFreeArea());
+ part->Channel = ChannelId;
+ part->Size = 0;
+ task.AppendBuf(part, sizeof(TChannelPart));
+ maxBytes -= sizeof(TChannelPart);
+ Y_VERIFY(maxBytes);
+
+ auto addChunk = [&](const void *data, size_t len) {
+ event.UpdateChecksum(Params, data, len);
+ task.AppendBuf(data, len);
+ part->Size += len;
+ Y_VERIFY_DEBUG(maxBytes >= len);
+ maxBytes -= len;
+
+ event.EventActuallySerialized += len;
+ if (event.EventActuallySerialized > MaxSerializedEventSize) {
+ throw TExSerializedEventTooLarge(event.Descr.Type);
+ }
+ };
+
+ bool complete = false;
+ if (State == EState::CHUNKER) {
+ Y_VERIFY_DEBUG(task.GetFreeArea() == part + 1);
+ while (!complete && maxBytes) {
+ const auto [first, last] = Chunker.FeedBuf(task.GetFreeArea(), maxBytes);
+ for (auto p = first; p != last; ++p) {
+ addChunk(p->first, p->second);
+ }
+ complete = Chunker.IsComplete();
+ }
+ Y_VERIFY(!complete || Chunker.IsSuccessfull());
+ Y_VERIFY_DEBUG(complete || !maxBytes);
+ } else { // BUFFER
+ while (const size_t numb = Min(maxBytes, Iter.ContiguousSize())) {
+ const char *obuf = Iter.ContiguousData();
+ addChunk(obuf, numb);
+ Iter += numb;
+ }
+ complete = !Iter.Valid();
+ }
+ if (complete) {
+ Y_VERIFY(event.EventActuallySerialized == event.EventSerializedSize,
+ "EventActuallySerialized# %" PRIu32 " EventSerializedSize# %" PRIu32 " Type# 0x%08" PRIx32,
+ event.EventActuallySerialized, event.EventSerializedSize, event.Descr.Type);
+ }
+
+ if (!part->Size) {
+ task.Undo(sizeof(TChannelPart));
+ } else {
+ *weightConsumed += sizeof(TChannelPart) + part->Size;
+ OutputQueueSize -= part->Size;
+ }
+ if (complete) {
+ State = EState::DESCRIPTOR;
+ }
+ break;
+ }
+
+ case EState::DESCRIPTOR:
+ if (!FeedDescriptor(task, event, weightConsumed)) {
+ return false;
+ }
+ event.Serial = serial;
+ NotYetConfirmed.splice(NotYetConfirmed.end(), Queue, Queue.begin()); // move event to not-yet-confirmed queue
+ State = EState::INITIAL;
+ return true; // we have processed whole event, signal to the caller
+ }
+ }
+ }
+
+ void TEventOutputChannel::NotifyUndelivered() {
+ LOG_DEBUG_IC_SESSION("ICOCH89", "Notyfying about Undelivered messages! NotYetConfirmed size: %zu, Queue size: %zu", NotYetConfirmed.size(), Queue.size());
+ if (State == EState::CHUNKER) {
+ Y_VERIFY(!Chunker.IsComplete()); // chunk must have an event being serialized
+ Y_VERIFY(!Queue.empty()); // this event must be the first event in queue
+ TEventHolder& event = Queue.front();
+ Y_VERIFY(Chunker.GetCurrentEvent() == event.Event.Get()); // ensure the event is valid
+ Chunker.Abort(); // stop serializing current event
+ Y_VERIFY(Chunker.IsComplete());
+ }
+ for (auto& item : NotYetConfirmed) {
+ if (item.Descr.Flags & IEventHandle::FlagGenerateUnsureUndelivered) { // notify only when unsure flag is set
+ item.ForwardOnNondelivery(true);
+ }
+ }
+ Pool.Release(NotYetConfirmed);
+ for (auto& item : Queue) {
+ item.ForwardOnNondelivery(false);
+ }
+ Pool.Release(Queue);
+ }
+
+}
diff --git a/library/cpp/actors/interconnect/interconnect_channel.h b/library/cpp/actors/interconnect/interconnect_channel.h
new file mode 100644
index 0000000000..e4a0ae3cda
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_channel.h
@@ -0,0 +1,127 @@
+#pragma once
+
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+#include <library/cpp/actors/core/actorsystem.h>
+#include <library/cpp/actors/core/event_load.h>
+#include <library/cpp/actors/util/rope.h>
+#include <util/generic/deque.h>
+#include <util/generic/vector.h>
+#include <util/generic/map.h>
+#include <util/stream/walk.h>
+#include <library/cpp/actors/wilson/wilson_event.h>
+#include <library/cpp/actors/helpers/mon_histogram_helper.h>
+
+#include "interconnect_common.h"
+#include "interconnect_counters.h"
+#include "packet.h"
+#include "event_holder_pool.h"
+
+namespace NActors {
+#pragma pack(push, 1)
+ struct TChannelPart {
+ ui16 Channel;
+ ui16 Size;
+
+ static constexpr ui16 LastPartFlag = ui16(1) << 15;
+
+ TString ToString() const {
+ return TStringBuilder() << "{Channel# " << (Channel & ~LastPartFlag)
+ << " LastPartFlag# " << ((Channel & LastPartFlag) ? "true" : "false")
+ << " Size# " << Size << "}";
+ }
+ };
+#pragma pack(pop)
+
+ struct TExSerializedEventTooLarge : std::exception {
+ const ui32 Type;
+
+ TExSerializedEventTooLarge(ui32 type)
+ : Type(type)
+ {}
+ };
+
+ class TEventOutputChannel : public TInterconnectLoggingBase {
+ public:
+ TEventOutputChannel(TEventHolderPool& pool, ui16 id, ui32 peerNodeId, ui32 maxSerializedEventSize,
+ std::shared_ptr<IInterconnectMetrics> metrics, TSessionParams params)
+ : TInterconnectLoggingBase(Sprintf("OutputChannel %" PRIu16 " [node %" PRIu32 "]", id, peerNodeId))
+ , Pool(pool)
+ , PeerNodeId(peerNodeId)
+ , ChannelId(id)
+ , Metrics(std::move(metrics))
+ , Params(std::move(params))
+ , MaxSerializedEventSize(maxSerializedEventSize)
+ {}
+
+ ~TEventOutputChannel() {
+ }
+
+ std::pair<ui32, TEventHolder*> Push(IEventHandle& ev) {
+ TEventHolder& event = Pool.Allocate(Queue);
+ const ui32 bytes = event.Fill(ev) + sizeof(TEventDescr);
+ OutputQueueSize += bytes;
+ return std::make_pair(bytes, &event);
+ }
+
+ void DropConfirmed(ui64 confirm);
+
+ bool FeedBuf(TTcpPacketOutTask& task, ui64 serial, ui64 *weightConsumed);
+
+ bool IsEmpty() const {
+ return Queue.empty();
+ }
+
+ bool IsWorking() const {
+ return !IsEmpty();
+ }
+
+ ui32 GetQueueSize() const {
+ return (ui32)Queue.size();
+ }
+
+ ui64 GetBufferedAmountOfData() const {
+ return OutputQueueSize;
+ }
+
+ void NotifyUndelivered();
+
+ TEventHolderPool& Pool;
+ const ui32 PeerNodeId;
+ const ui16 ChannelId;
+ std::shared_ptr<IInterconnectMetrics> Metrics;
+ const TSessionParams Params;
+ const ui32 MaxSerializedEventSize;
+ ui64 UnaccountedTraffic = 0;
+ ui64 EqualizeCounterOnPause = 0;
+ ui64 WeightConsumedOnPause = 0;
+
+ enum class EState {
+ INITIAL,
+ CHUNKER,
+ BUFFER,
+ DESCRIPTOR,
+ };
+ EState State = EState::INITIAL;
+
+ static constexpr ui16 MinimumFreeSpace = sizeof(TChannelPart) + sizeof(TEventDescr);
+
+ protected:
+ ui64 OutputQueueSize = 0;
+
+ std::list<TEventHolder> Queue;
+ std::list<TEventHolder> NotYetConfirmed;
+ TRope::TConstIterator Iter;
+ TCoroutineChunkSerializer Chunker;
+ bool ExtendedFormat = false;
+
+ bool FeedDescriptor(TTcpPacketOutTask& task, TEventHolder& event, ui64 *weightConsumed);
+
+ void AccountTraffic() {
+ if (const ui64 amount = std::exchange(UnaccountedTraffic, 0)) {
+ Metrics->UpdateOutputChannelTraffic(ChannelId, amount);
+ }
+ }
+
+ friend class TInterconnectSessionTCP;
+ };
+}
diff --git a/library/cpp/actors/interconnect/interconnect_common.h b/library/cpp/actors/interconnect/interconnect_common.h
new file mode 100644
index 0000000000..285709a00c
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_common.h
@@ -0,0 +1,106 @@
+#pragma once
+
+#include <library/cpp/actors/core/actorid.h>
+#include <library/cpp/actors/core/actorsystem.h>
+#include <library/cpp/actors/util/datetime.h>
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+#include <library/cpp/monlib/metrics/metric_registry.h>
+#include <util/generic/map.h>
+#include <util/generic/set.h>
+#include <util/system/datetime.h>
+
+#include "poller_tcp.h"
+#include "logging.h"
+#include "event_filter.h"
+
+#include <atomic>
+
+namespace NActors {
+ enum class EEncryptionMode {
+ DISABLED, // no encryption is required at all
+ OPTIONAL, // encryption is enabled when supported by both peers
+ REQUIRED, // encryption is mandatory
+ };
+
+ struct TInterconnectSettings {
+ TDuration Handshake;
+ TDuration DeadPeer;
+ TDuration CloseOnIdle;
+ ui32 SendBufferDieLimitInMB = 0;
+ ui64 OutputBuffersTotalSizeLimitInMB = 0;
+ ui32 TotalInflightAmountOfData = 0;
+ bool MergePerPeerCounters = false;
+ bool MergePerDataCenterCounters = false;
+ ui32 TCPSocketBufferSize = 0;
+ TDuration PingPeriod = TDuration::Seconds(3);
+ TDuration ForceConfirmPeriod = TDuration::Seconds(1);
+ TDuration LostConnection;
+ TDuration BatchPeriod;
+ bool BindOnAllAddresses = true;
+ EEncryptionMode EncryptionMode = EEncryptionMode::DISABLED;
+ bool TlsAuthOnly = false;
+ TString Certificate; // certificate data in PEM format
+ TString PrivateKey; // private key for the certificate in PEM format
+ TString CaFilePath; // path to certificate authority file
+ TString CipherList; // encryption algorithms
+ TDuration MessagePendingTimeout = TDuration::Seconds(1); // timeout for which messages are queued while in PendingConnection state
+ ui64 MessagePendingSize = Max<ui64>(); // size of the queue
+ ui32 MaxSerializedEventSize = NActors::EventMaxByteSize;
+
+ ui32 GetSendBufferSize() const {
+ ui32 res = 512 * 1024; // 512 kb is the default value for send buffer
+ if (TCPSocketBufferSize) {
+ res = TCPSocketBufferSize;
+ }
+ return res;
+ }
+ };
+
+ struct TChannelSettings {
+ ui16 Weight;
+ };
+
+ typedef TMap<ui16, TChannelSettings> TChannelsConfig;
+
+ using TRegisterMonPageCallback = std::function<void(const TString& path, const TString& title,
+ TActorSystem* actorSystem, const TActorId& actorId)>;
+
+ using TInitWhiteboardCallback = std::function<void(ui16 icPort, TActorSystem* actorSystem)>;
+
+ using TUpdateWhiteboardCallback = std::function<void(const TString& peer, bool connected, bool green, bool yellow,
+ bool orange, bool red, TActorSystem* actorSystem)>;
+
+ struct TInterconnectProxyCommon : TAtomicRefCount<TInterconnectProxyCommon> {
+ TActorId NameserviceId;
+ NMonitoring::TDynamicCounterPtr MonCounters;
+ std::shared_ptr<NMonitoring::IMetricRegistry> Metrics;
+ TChannelsConfig ChannelsConfig;
+ TInterconnectSettings Settings;
+ TRegisterMonPageCallback RegisterMonPage;
+ TActorId DestructorId;
+ std::shared_ptr<std::atomic<TAtomicBase>> DestructorQueueSize;
+ TAtomicBase MaxDestructorQueueSize = 1024 * 1024 * 1024;
+ TString ClusterUUID;
+ TVector<TString> AcceptUUID;
+ ui64 StartTime = GetCycleCountFast();
+ TString TechnicalSelfHostName;
+ TInitWhiteboardCallback InitWhiteboard;
+ TUpdateWhiteboardCallback UpdateWhiteboard;
+ ui32 HandshakeBallastSize = 0;
+ TAtomic StartedSessionKiller = 0;
+ TScopeId LocalScopeId;
+ std::shared_ptr<TEventFilter> EventFilter;
+ TString Cookie; // unique random identifier of a node instance (generated randomly at every start)
+ std::unordered_map<ui16, TString> ChannelName;
+
+ struct TVersionInfo {
+ TString Tag; // version tag for this node
+ TSet<TString> AcceptedTags; // we accept all enlisted version tags of peer nodes, but no others; empty = accept all
+ };
+
+ TMaybe<TVersionInfo> VersionInfo;
+
+ using TPtr = TIntrusivePtr<TInterconnectProxyCommon>;
+ };
+
+}
diff --git a/library/cpp/actors/interconnect/interconnect_counters.cpp b/library/cpp/actors/interconnect/interconnect_counters.cpp
new file mode 100644
index 0000000000..ba674f664b
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_counters.cpp
@@ -0,0 +1,692 @@
+#include "interconnect_counters.h"
+
+#include <library/cpp/monlib/metrics/metric_registry.h>
+#include <library/cpp/monlib/metrics/metric_sub_registry.h>
+
+#include <unordered_map>
+
+namespace NActors {
+
+namespace {
+
+ class TInterconnectCounters: public IInterconnectMetrics {
+ public:
+ struct TOutputChannel {
+ NMonitoring::TDynamicCounters::TCounterPtr Traffic;
+ NMonitoring::TDynamicCounters::TCounterPtr Events;
+ NMonitoring::TDynamicCounters::TCounterPtr OutgoingTraffic;
+ NMonitoring::TDynamicCounters::TCounterPtr OutgoingEvents;
+
+ TOutputChannel() = default;
+
+ TOutputChannel(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters,
+ NMonitoring::TDynamicCounters::TCounterPtr traffic,
+ NMonitoring::TDynamicCounters::TCounterPtr events)
+ : Traffic(std::move(traffic))
+ , Events(std::move(events))
+ , OutgoingTraffic(counters->GetCounter("OutgoingTraffic", true))
+ , OutgoingEvents(counters->GetCounter("OutgoingEvents", true))
+ {}
+
+ TOutputChannel(const TOutputChannel&) = default;
+ };
+
+ struct TInputChannel {
+ NMonitoring::TDynamicCounters::TCounterPtr Traffic;
+ NMonitoring::TDynamicCounters::TCounterPtr Events;
+ NMonitoring::TDynamicCounters::TCounterPtr ScopeErrors;
+ NMonitoring::TDynamicCounters::TCounterPtr IncomingTraffic;
+ NMonitoring::TDynamicCounters::TCounterPtr IncomingEvents;
+
+ TInputChannel() = default;
+
+ TInputChannel(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters,
+ NMonitoring::TDynamicCounters::TCounterPtr traffic,
+ NMonitoring::TDynamicCounters::TCounterPtr events,
+ NMonitoring::TDynamicCounters::TCounterPtr scopeErrors)
+ : Traffic(std::move(traffic))
+ , Events(std::move(events))
+ , ScopeErrors(std::move(scopeErrors))
+ , IncomingTraffic(counters->GetCounter("IncomingTraffic", true))
+ , IncomingEvents(counters->GetCounter("IncomingEvents", true))
+ {}
+
+ TInputChannel(const TInputChannel&) = default;
+ };
+
+ struct TInputChannels : std::unordered_map<ui16, TInputChannel> {
+ TInputChannel OtherInputChannel;
+
+ TInputChannels() = default;
+
+ TInputChannels(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters,
+ const std::unordered_map<ui16, TString>& names,
+ NMonitoring::TDynamicCounters::TCounterPtr traffic,
+ NMonitoring::TDynamicCounters::TCounterPtr events,
+ NMonitoring::TDynamicCounters::TCounterPtr scopeErrors)
+ : OtherInputChannel(counters->GetSubgroup("channel", "other"), traffic, events, scopeErrors)
+ {
+ for (const auto& [id, name] : names) {
+ try_emplace(id, counters->GetSubgroup("channel", name), traffic, events, scopeErrors);
+ }
+ }
+
+ TInputChannels(const TInputChannels&) = default;
+
+ const TInputChannel& Get(ui16 id) const {
+ const auto it = find(id);
+ return it != end() ? it->second : OtherInputChannel;
+ }
+ };
+
+ private:
+ const TInterconnectProxyCommon::TPtr Common;
+ const bool MergePerDataCenterCounters;
+ const bool MergePerPeerCounters;
+ NMonitoring::TDynamicCounterPtr Counters;
+ NMonitoring::TDynamicCounterPtr PerSessionCounters;
+ NMonitoring::TDynamicCounterPtr PerDataCenterCounters;
+ NMonitoring::TDynamicCounterPtr& AdaptiveCounters;
+
+ bool Initialized = false;
+
+ NMonitoring::TDynamicCounters::TCounterPtr Traffic;
+ NMonitoring::TDynamicCounters::TCounterPtr Events;
+ NMonitoring::TDynamicCounters::TCounterPtr ScopeErrors;
+
+ public:
+ TInterconnectCounters(const TInterconnectProxyCommon::TPtr& common)
+ : Common(common)
+ , MergePerDataCenterCounters(common->Settings.MergePerDataCenterCounters)
+ , MergePerPeerCounters(common->Settings.MergePerPeerCounters)
+ , Counters(common->MonCounters)
+ , AdaptiveCounters(MergePerDataCenterCounters
+ ? PerDataCenterCounters :
+ MergePerPeerCounters ? Counters : PerSessionCounters)
+ {}
+
+ void AddInflightDataAmount(ui64 value) override {
+ *InflightDataAmount += value;
+ }
+
+ void SubInflightDataAmount(ui64 value) override {
+ *InflightDataAmount -= value;
+ }
+
+ void AddTotalBytesWritten(ui64 value) override {
+ *TotalBytesWritten += value;
+ }
+
+ void SetClockSkewMicrosec(i64 value) override {
+ *ClockSkewMicrosec = value;
+ }
+
+ void IncSessionDeaths() override {
+ ++*SessionDeaths;
+ }
+
+ void IncHandshakeFails() override {
+ ++*HandshakeFails;
+ }
+
+ void SetConnected(ui32 value) override {
+ *Connected = value;
+ }
+
+ void IncSubscribersCount() override {
+ ++*SubscribersCount;
+ }
+
+ void SubSubscribersCount(ui32 value) override {
+ *SubscribersCount -= value;
+ }
+
+ void SubOutputBuffersTotalSize(ui64 value) override {
+ *OutputBuffersTotalSize -= value;
+ }
+
+ void AddOutputBuffersTotalSize(ui64 value) override {
+ *OutputBuffersTotalSize += value;
+ }
+
+ ui64 GetOutputBuffersTotalSize() const override {
+ return *OutputBuffersTotalSize;
+ }
+
+ void IncDisconnections() override {
+ ++*Disconnections;
+ }
+
+ void IncUsefulWriteWakeups() override {
+ ++*UsefulWriteWakeups;
+ }
+
+ void IncSpuriousWriteWakeups() override {
+ ++*SpuriousWriteWakeups;
+ }
+
+ void IncSendSyscalls() override {
+ ++*SendSyscalls;
+ }
+
+ void IncInflyLimitReach() override {
+ ++*InflyLimitReach;
+ }
+
+ void IncUsefulReadWakeups() override {
+ ++*UsefulReadWakeups;
+ }
+
+ void IncSpuriousReadWakeups() override {
+ ++*SpuriousReadWakeups;
+ }
+
+ void IncDisconnectByReason(const TString& s) override {
+ if (auto it = DisconnectByReason.find(s); it != DisconnectByReason.end()) {
+ it->second->Inc();
+ }
+ }
+
+ void AddInputChannelsIncomingTraffic(ui16 channel, ui64 incomingTraffic) override {
+ auto& ch = InputChannels.Get(channel);
+ *ch.IncomingTraffic += incomingTraffic;
+ }
+
+ void IncInputChannelsIncomingEvents(ui16 channel) override {
+ auto& ch = InputChannels.Get(channel);
+ ++*ch.IncomingEvents;
+ }
+
+ void IncRecvSyscalls() override {
+ ++*RecvSyscalls;
+ }
+
+ void AddTotalBytesRead(ui64 value) override {
+ *TotalBytesRead += value;
+ }
+
+ void UpdateLegacyPingTimeHist(ui64 value) override {
+ LegacyPingTimeHist.Add(value);
+ PingTimeHistogram->Collect(value);
+ }
+
+ void UpdateOutputChannelTraffic(ui16 channel, ui64 value) override {
+ if (GetOutputChannel(channel).OutgoingTraffic) {
+ *(GetOutputChannel(channel).OutgoingTraffic) += value;
+ }
+ if (GetOutputChannel(channel).Traffic) {
+ *(GetOutputChannel(channel).Traffic) += value;
+ }
+ }
+
+ void UpdateOutputChannelEvents(ui16 channel) override {
+ if (GetOutputChannel(channel).OutgoingEvents) {
+ ++*(GetOutputChannel(channel).OutgoingEvents);
+ }
+ if (GetOutputChannel(channel).Events) {
+ ++*(GetOutputChannel(channel).Events);
+ }
+ }
+
+ void SetPeerInfo(const TString& name, const TString& dataCenterId) override {
+ if (name != std::exchange(HumanFriendlyPeerHostName, name)) {
+ PerSessionCounters.Reset();
+ }
+ VALGRIND_MAKE_READABLE(&DataCenterId, sizeof(DataCenterId));
+ if (dataCenterId != std::exchange(DataCenterId, dataCenterId)) {
+ PerDataCenterCounters.Reset();
+ }
+
+ const bool updatePerDataCenter = !PerDataCenterCounters && MergePerDataCenterCounters;
+ if (updatePerDataCenter) {
+ PerDataCenterCounters = Counters->GetSubgroup("dataCenterId", *DataCenterId);
+ }
+
+ const bool updatePerSession = !PerSessionCounters || updatePerDataCenter;
+ if (updatePerSession) {
+ auto base = MergePerDataCenterCounters ? PerDataCenterCounters : Counters;
+ PerSessionCounters = base->GetSubgroup("peer", *HumanFriendlyPeerHostName);
+ }
+
+ const bool updateGlobal = !Initialized;
+
+ const bool updateAdaptive =
+ &AdaptiveCounters == &Counters ? updateGlobal :
+ &AdaptiveCounters == &PerSessionCounters ? updatePerSession :
+ &AdaptiveCounters == &PerDataCenterCounters ? updatePerDataCenter :
+ false;
+
+ if (updatePerSession) {
+ Connected = PerSessionCounters->GetCounter("Connected");
+ Disconnections = PerSessionCounters->GetCounter("Disconnections", true);
+ ClockSkewMicrosec = PerSessionCounters->GetCounter("ClockSkewMicrosec");
+ Traffic = PerSessionCounters->GetCounter("Traffic", true);
+ Events = PerSessionCounters->GetCounter("Events", true);
+ ScopeErrors = PerSessionCounters->GetCounter("ScopeErrors", true);
+
+ for (const auto& [id, name] : Common->ChannelName) {
+ OutputChannels.try_emplace(id, Counters->GetSubgroup("channel", name), Traffic, Events);
+ }
+ OtherOutputChannel = TOutputChannel(Counters->GetSubgroup("channel", "other"), Traffic, Events);
+
+ InputChannels = TInputChannels(Counters, Common->ChannelName, Traffic, Events, ScopeErrors);
+ }
+
+ if (updateAdaptive) {
+ SessionDeaths = AdaptiveCounters->GetCounter("Session_Deaths", true);
+ HandshakeFails = AdaptiveCounters->GetCounter("Handshake_Fails", true);
+ InflyLimitReach = AdaptiveCounters->GetCounter("InflyLimitReach", true);
+ InflightDataAmount = AdaptiveCounters->GetCounter("Inflight_Data");
+
+ LegacyPingTimeHist = {};
+ LegacyPingTimeHist.Init(AdaptiveCounters.Get(), "PingTimeHist", "mks", 125, 18);
+
+ PingTimeHistogram = AdaptiveCounters->GetHistogram(
+ "PingTimeUs", NMonitoring::ExponentialHistogram(18, 2, 125));
+ }
+
+ if (updateGlobal) {
+ OutputBuffersTotalSize = Counters->GetCounter("OutputBuffersTotalSize");
+ SendSyscalls = Counters->GetCounter("SendSyscalls", true);
+ RecvSyscalls = Counters->GetCounter("RecvSyscalls", true);
+ SpuriousReadWakeups = Counters->GetCounter("SpuriousReadWakeups", true);
+ UsefulReadWakeups = Counters->GetCounter("UsefulReadWakeups", true);
+ SpuriousWriteWakeups = Counters->GetCounter("SpuriousWriteWakeups", true);
+ UsefulWriteWakeups = Counters->GetCounter("UsefulWriteWakeups", true);
+ SubscribersCount = AdaptiveCounters->GetCounter("SubscribersCount");
+ TotalBytesWritten = Counters->GetCounter("TotalBytesWritten", true);
+ TotalBytesRead = Counters->GetCounter("TotalBytesRead", true);
+
+ auto disconnectReasonGroup = Counters->GetSubgroup("subsystem", "disconnectReason");
+ for (const char *reason : TDisconnectReason::Reasons) {
+ DisconnectByReason[reason] = disconnectReasonGroup->GetNamedCounter("reason", reason, true);
+ }
+ }
+
+ Initialized = true;
+ }
+
+ TOutputChannel GetOutputChannel(ui16 index) const {
+ Y_VERIFY(Initialized);
+ const auto it = OutputChannels.find(index);
+ return it != OutputChannels.end() ? it->second : OtherOutputChannel;
+ }
+
+ private:
+ NMonitoring::TDynamicCounters::TCounterPtr SessionDeaths;
+ NMonitoring::TDynamicCounters::TCounterPtr HandshakeFails;
+ NMonitoring::TDynamicCounters::TCounterPtr Connected;
+ NMonitoring::TDynamicCounters::TCounterPtr Disconnections;
+ NMonitoring::TDynamicCounters::TCounterPtr InflightDataAmount;
+ NMonitoring::TDynamicCounters::TCounterPtr InflyLimitReach;
+ NMonitoring::TDynamicCounters::TCounterPtr OutputBuffersTotalSize;
+ NMonitoring::TDynamicCounters::TCounterPtr QueueUtilization;
+ NMonitoring::TDynamicCounters::TCounterPtr SubscribersCount;
+ NMonitoring::TDynamicCounters::TCounterPtr SendSyscalls;
+ NMonitoring::TDynamicCounters::TCounterPtr ClockSkewMicrosec;
+ NMonitoring::TDynamicCounters::TCounterPtr RecvSyscalls;
+ NMonitoring::TDynamicCounters::TCounterPtr UsefulReadWakeups;
+ NMonitoring::TDynamicCounters::TCounterPtr SpuriousReadWakeups;
+ NMonitoring::TDynamicCounters::TCounterPtr UsefulWriteWakeups;
+ NMonitoring::TDynamicCounters::TCounterPtr SpuriousWriteWakeups;
+ NMon::THistogramCounterHelper LegacyPingTimeHist;
+ NMonitoring::THistogramPtr PingTimeHistogram;
+
+ std::unordered_map<ui16, TOutputChannel> OutputChannels;
+ TOutputChannel OtherOutputChannel;
+ TInputChannels InputChannels;
+ THashMap<TString, NMonitoring::TDynamicCounters::TCounterPtr> DisconnectByReason;
+
+ NMonitoring::TDynamicCounters::TCounterPtr TotalBytesWritten, TotalBytesRead;
+ };
+
+ class TInterconnectMetrics: public IInterconnectMetrics {
+ public:
+ struct TOutputChannel {
+ NMonitoring::IRate* Traffic;
+ NMonitoring::IRate* Events;
+ NMonitoring::IRate* OutgoingTraffic;
+ NMonitoring::IRate* OutgoingEvents;
+
+ TOutputChannel() = default;
+
+ TOutputChannel(const std::shared_ptr<NMonitoring::IMetricRegistry>& metrics,
+ NMonitoring::IRate* traffic,
+ NMonitoring::IRate* events)
+ : Traffic(traffic)
+ , Events(events)
+ , OutgoingTraffic(metrics->Rate(NMonitoring::MakeLabels({{"sensor", "interconnect.outgoing_traffic"}})))
+ , OutgoingEvents(metrics->Rate(NMonitoring::MakeLabels({{"sensor", "interconnect.outgoing_events"}})))
+ {}
+
+ TOutputChannel(const TOutputChannel&) = default;
+ };
+
+ struct TInputChannel {
+ NMonitoring::IRate* Traffic;
+ NMonitoring::IRate* Events;
+ NMonitoring::IRate* ScopeErrors;
+ NMonitoring::IRate* IncomingTraffic;
+ NMonitoring::IRate* IncomingEvents;
+
+ TInputChannel() = default;
+
+ TInputChannel(const std::shared_ptr<NMonitoring::IMetricRegistry>& metrics,
+ NMonitoring::IRate* traffic, NMonitoring::IRate* events,
+ NMonitoring::IRate* scopeErrors)
+ : Traffic(traffic)
+ , Events(events)
+ , ScopeErrors(scopeErrors)
+ , IncomingTraffic(metrics->Rate(NMonitoring::MakeLabels({{"sensor", "interconnect.incoming_traffic"}})))
+ , IncomingEvents(metrics->Rate(NMonitoring::MakeLabels({{"sensor", "interconnect.incoming_events"}})))
+ {}
+
+ TInputChannel(const TInputChannel&) = default;
+ };
+
+ struct TInputChannels : std::unordered_map<ui16, TInputChannel> {
+ TInputChannel OtherInputChannel;
+
+ TInputChannels() = default;
+
+ TInputChannels(const std::shared_ptr<NMonitoring::IMetricRegistry>& metrics,
+ const std::unordered_map<ui16, TString>& names,
+ NMonitoring::IRate* traffic, NMonitoring::IRate* events,
+ NMonitoring::IRate* scopeErrors)
+ : OtherInputChannel(std::make_shared<NMonitoring::TMetricSubRegistry>(
+ NMonitoring::TLabels{{"channel", "other"}}, metrics), traffic, events, scopeErrors)
+ {
+ for (const auto& [id, name] : names) {
+ try_emplace(id, std::make_shared<NMonitoring::TMetricSubRegistry>(NMonitoring::TLabels{{"channel", name}}, metrics),
+ traffic, events, scopeErrors);
+ }
+ }
+
+ TInputChannels(const TInputChannels&) = default;
+
+ const TInputChannel& Get(ui16 id) const {
+ const auto it = find(id);
+ return it != end() ? it->second : OtherInputChannel;
+ }
+ };
+
+ TInterconnectMetrics(const TInterconnectProxyCommon::TPtr& common)
+ : Common(common)
+ , MergePerDataCenterMetrics_(common->Settings.MergePerDataCenterCounters)
+ , MergePerPeerMetrics_(common->Settings.MergePerPeerCounters)
+ , Metrics_(common->Metrics)
+ , AdaptiveMetrics_(MergePerDataCenterMetrics_
+ ? PerDataCenterMetrics_ :
+ MergePerPeerMetrics_ ? Metrics_ : PerSessionMetrics_)
+ {}
+
+ void AddInflightDataAmount(ui64 value) override {
+ InflightDataAmount_->Add(value);
+ }
+
+ void SubInflightDataAmount(ui64 value) override {
+ InflightDataAmount_->Add(-value);
+ }
+
+ void AddTotalBytesWritten(ui64 value) override {
+ TotalBytesWritten_->Add(value);
+ }
+
+ void SetClockSkewMicrosec(i64 value) override {
+ ClockSkewMicrosec_->Set(value);
+ }
+
+ void IncSessionDeaths() override {
+ SessionDeaths_->Inc();
+ }
+
+ void IncHandshakeFails() override {
+ HandshakeFails_->Inc();
+ }
+
+ void SetConnected(ui32 value) override {
+ Connected_->Set(value);
+ }
+
+ void IncSubscribersCount() override {
+ SubscribersCount_->Inc();
+ }
+
+ void SubSubscribersCount(ui32 value) override {
+ SubscribersCount_->Add(-value);
+ }
+
+ void SubOutputBuffersTotalSize(ui64 value) override {
+ OutputBuffersTotalSize_->Add(-value);
+ }
+
+ void AddOutputBuffersTotalSize(ui64 value) override {
+ OutputBuffersTotalSize_->Add(value);
+ }
+
+ ui64 GetOutputBuffersTotalSize() const override {
+ return OutputBuffersTotalSize_->Get();
+ }
+
+ void IncDisconnections() override {
+ Disconnections_->Inc();
+ }
+
+ void IncUsefulWriteWakeups() override {
+ UsefulWriteWakeups_->Inc();
+ }
+
+ void IncSpuriousWriteWakeups() override {
+ SpuriousWriteWakeups_->Inc();
+ }
+
+ void IncSendSyscalls() override {
+ SendSyscalls_->Inc();
+ }
+
+ void IncInflyLimitReach() override {
+ InflyLimitReach_->Inc();
+ }
+
+ void IncUsefulReadWakeups() override {
+ UsefulReadWakeups_->Inc();
+ }
+
+ void IncSpuriousReadWakeups() override {
+ SpuriousReadWakeups_->Inc();
+ }
+
+ void IncDisconnectByReason(const TString& s) override {
+ if (auto it = DisconnectByReason_.find(s); it != DisconnectByReason_.end()) {
+ it->second->Inc();
+ }
+ }
+
+ void AddInputChannelsIncomingTraffic(ui16 channel, ui64 incomingTraffic) override {
+ auto& ch = InputChannels_.Get(channel);
+ ch.IncomingTraffic->Add(incomingTraffic);
+ }
+
+ void IncInputChannelsIncomingEvents(ui16 channel) override {
+ auto& ch = InputChannels_.Get(channel);
+ ch.IncomingEvents->Inc();
+ }
+
+ void IncRecvSyscalls() override {
+ RecvSyscalls_->Inc();
+ }
+
+ void AddTotalBytesRead(ui64 value) override {
+ TotalBytesRead_->Add(value);
+ }
+
+ void UpdateLegacyPingTimeHist(ui64 value) override {
+ PingTimeHistogram_->Record(value);
+ }
+
+ void UpdateOutputChannelTraffic(ui16 channel, ui64 value) override {
+ if (GetOutputChannel(channel).OutgoingTraffic) {
+ GetOutputChannel(channel).OutgoingTraffic->Add(value);
+ }
+ if (GetOutputChannel(channel).Traffic) {
+ GetOutputChannel(channel).Traffic->Add(value);
+ }
+ }
+
+ void UpdateOutputChannelEvents(ui16 channel) override {
+ if (GetOutputChannel(channel).OutgoingEvents) {
+ GetOutputChannel(channel).OutgoingEvents->Inc();
+ }
+ if (GetOutputChannel(channel).Events) {
+ GetOutputChannel(channel).Events->Inc();
+ }
+ }
+
+ void SetPeerInfo(const TString& name, const TString& dataCenterId) override {
+ if (name != std::exchange(HumanFriendlyPeerHostName, name)) {
+ PerSessionMetrics_.reset();
+ }
+ VALGRIND_MAKE_READABLE(&DataCenterId, sizeof(DataCenterId));
+ if (dataCenterId != std::exchange(DataCenterId, dataCenterId)) {
+ PerDataCenterMetrics_.reset();
+ }
+
+ const bool updatePerDataCenter = !PerDataCenterMetrics_ && MergePerDataCenterMetrics_;
+ if (updatePerDataCenter) {
+ PerDataCenterMetrics_ = std::make_shared<NMonitoring::TMetricSubRegistry>(
+ NMonitoring::TLabels{{"datacenter_id", *DataCenterId}}, Metrics_);
+ }
+
+ const bool updatePerSession = !PerSessionMetrics_ || updatePerDataCenter;
+ if (updatePerSession) {
+ auto base = MergePerDataCenterMetrics_ ? PerDataCenterMetrics_ : Metrics_;
+ PerSessionMetrics_ = std::make_shared<NMonitoring::TMetricSubRegistry>(
+ NMonitoring::TLabels{{"peer", *HumanFriendlyPeerHostName}}, base);
+ }
+
+ const bool updateGlobal = !Initialized_;
+
+ const bool updateAdaptive =
+ &AdaptiveMetrics_ == &Metrics_ ? updateGlobal :
+ &AdaptiveMetrics_ == &PerSessionMetrics_ ? updatePerSession :
+ &AdaptiveMetrics_ == &PerDataCenterMetrics_ ? updatePerDataCenter :
+ false;
+
+ auto createRate = [](std::shared_ptr<NMonitoring::IMetricRegistry> metrics, TStringBuf name) mutable {
+ return metrics->Rate(NMonitoring::MakeLabels(NMonitoring::TLabels{{"sensor", name}}));
+ };
+ auto createIntGauge = [](std::shared_ptr<NMonitoring::IMetricRegistry> metrics, TStringBuf name) mutable {
+ return metrics->IntGauge(NMonitoring::MakeLabels(NMonitoring::TLabels{{"sensor", name}}));
+ };
+
+ if (updatePerSession) {
+ Connected_ = createIntGauge(PerSessionMetrics_, "interconnect.connected");
+ Disconnections_ = createRate(PerSessionMetrics_, "interconnect.disconnections");
+ ClockSkewMicrosec_ = createIntGauge(PerSessionMetrics_, "interconnect.clock_skew_microsec");
+ Traffic_ = createRate(PerSessionMetrics_, "interconnect.traffic");
+ Events_ = createRate(PerSessionMetrics_, "interconnect.events");
+ ScopeErrors_ = createRate(PerSessionMetrics_, "interconnect.scope_errors");
+
+ for (const auto& [id, name] : Common->ChannelName) {
+ OutputChannels_.try_emplace(id, std::make_shared<NMonitoring::TMetricSubRegistry>(
+ NMonitoring::TLabels{{"channel", name}}, Metrics_), Traffic_, Events_);
+ }
+ OtherOutputChannel_ = TOutputChannel(std::make_shared<NMonitoring::TMetricSubRegistry>(
+ NMonitoring::TLabels{{"channel", "other"}}, Metrics_), Traffic_, Events_);
+
+ InputChannels_ = TInputChannels(Metrics_, Common->ChannelName, Traffic_, Events_, ScopeErrors_);
+ }
+
+ if (updateAdaptive) {
+ SessionDeaths_ = createRate(AdaptiveMetrics_, "interconnect.session_deaths");
+ HandshakeFails_ = createRate(AdaptiveMetrics_, "interconnect.handshake_fails");
+ InflyLimitReach_ = createRate(AdaptiveMetrics_, "interconnect.infly_limit_reach");
+ InflightDataAmount_ = createRate(AdaptiveMetrics_, "interconnect.inflight_data");
+ PingTimeHistogram_ = AdaptiveMetrics_->HistogramRate(
+ NMonitoring::MakeLabels({{"sensor", "interconnect.ping_time_us"}}), NMonitoring::ExponentialHistogram(18, 2, 125));
+ }
+
+ if (updateGlobal) {
+ OutputBuffersTotalSize_ = createRate(Metrics_, "interconnect.output_buffers_total_size");
+ SendSyscalls_ = createRate(Metrics_, "interconnect.send_syscalls");
+ RecvSyscalls_ = createRate(Metrics_, "interconnect.recv_syscalls");
+ SpuriousReadWakeups_ = createRate(Metrics_, "interconnect.spurious_read_wakeups");
+ UsefulReadWakeups_ = createRate(Metrics_, "interconnect.useful_read_wakeups");
+ SpuriousWriteWakeups_ = createRate(Metrics_, "interconnect.spurious_write_wakeups");
+ UsefulWriteWakeups_ = createRate(Metrics_, "interconnect.useful_write_wakeups");
+ SubscribersCount_ = createIntGauge(AdaptiveMetrics_, "interconnect.subscribers_count");
+ TotalBytesWritten_ = createRate(Metrics_, "interconnect.total_bytes_written");
+ TotalBytesRead_ = createRate(Metrics_, "interconnect.total_bytes_read");
+
+ for (const char *reason : TDisconnectReason::Reasons) {
+ DisconnectByReason_[reason] = Metrics_->Rate(
+ NMonitoring::MakeLabels({
+ {"sensor", "interconnect.disconnect_reason"},
+ {"reason", reason},
+ }));
+ }
+ }
+
+ Initialized_ = true;
+ }
+
+ TOutputChannel GetOutputChannel(ui16 index) const {
+ Y_VERIFY(Initialized_);
+ const auto it = OutputChannels_.find(index);
+ return it != OutputChannels_.end() ? it->second : OtherOutputChannel_;
+ }
+
+ private:
+ const TInterconnectProxyCommon::TPtr Common;
+ const bool MergePerDataCenterMetrics_;
+ const bool MergePerPeerMetrics_;
+ std::shared_ptr<NMonitoring::IMetricRegistry> Metrics_;
+ std::shared_ptr<NMonitoring::IMetricRegistry> PerSessionMetrics_;
+ std::shared_ptr<NMonitoring::IMetricRegistry> PerDataCenterMetrics_;
+ std::shared_ptr<NMonitoring::IMetricRegistry>& AdaptiveMetrics_;
+ bool Initialized_ = false;
+
+ NMonitoring::IRate* Traffic_;
+
+ NMonitoring::IRate* Events_;
+ NMonitoring::IRate* ScopeErrors_;
+ NMonitoring::IRate* Disconnections_;
+ NMonitoring::IIntGauge* Connected_;
+
+ NMonitoring::IRate* SessionDeaths_;
+ NMonitoring::IRate* HandshakeFails_;
+ NMonitoring::IRate* InflyLimitReach_;
+ NMonitoring::IRate* InflightDataAmount_;
+ NMonitoring::IRate* OutputBuffersTotalSize_;
+ NMonitoring::IIntGauge* SubscribersCount_;
+ NMonitoring::IRate* SendSyscalls_;
+ NMonitoring::IRate* RecvSyscalls_;
+ NMonitoring::IRate* SpuriousWriteWakeups_;
+ NMonitoring::IRate* UsefulWriteWakeups_;
+ NMonitoring::IRate* SpuriousReadWakeups_;
+ NMonitoring::IRate* UsefulReadWakeups_;
+ NMonitoring::IIntGauge* ClockSkewMicrosec_;
+
+ NMonitoring::IHistogram* PingTimeHistogram_;
+
+ std::unordered_map<ui16, TOutputChannel> OutputChannels_;
+ TOutputChannel OtherOutputChannel_;
+ TInputChannels InputChannels_;
+
+ THashMap<TString, NMonitoring::IRate*> DisconnectByReason_;
+
+ NMonitoring::IRate* TotalBytesWritten_;
+ NMonitoring::IRate* TotalBytesRead_;
+ };
+
+} // namespace
+
+std::unique_ptr<IInterconnectMetrics> CreateInterconnectCounters(const TInterconnectProxyCommon::TPtr& common) {
+ return std::make_unique<TInterconnectCounters>(common);
+}
+
+std::unique_ptr<IInterconnectMetrics> CreateInterconnectMetrics(const TInterconnectProxyCommon::TPtr& common) {
+ return std::make_unique<TInterconnectMetrics>(common);
+}
+
+} // NActors
diff --git a/library/cpp/actors/interconnect/interconnect_counters.h b/library/cpp/actors/interconnect/interconnect_counters.h
new file mode 100644
index 0000000000..e30f03a0bc
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_counters.h
@@ -0,0 +1,59 @@
+#pragma once
+
+#include <library/cpp/actors/helpers/mon_histogram_helper.h>
+
+#include <util/system/valgrind.h>
+
+#include "types.h"
+
+#include "interconnect_common.h"
+
+#include <memory>
+#include <optional>
+
+namespace NActors {
+
+class IInterconnectMetrics {
+public:
+ virtual ~IInterconnectMetrics() = default;
+
+ virtual void AddInflightDataAmount(ui64 value) = 0;
+ virtual void SubInflightDataAmount(ui64 value) = 0;
+ virtual void AddTotalBytesWritten(ui64 value) = 0;
+ virtual void SetClockSkewMicrosec(i64 value) = 0;
+ virtual void IncSessionDeaths() = 0;
+ virtual void IncHandshakeFails() = 0;
+ virtual void SetConnected(ui32 value) = 0;
+ virtual void IncSubscribersCount() = 0;
+ virtual void SubSubscribersCount(ui32 value) = 0;
+ virtual void SubOutputBuffersTotalSize(ui64 value) = 0;
+ virtual void AddOutputBuffersTotalSize(ui64 value) = 0;
+ virtual ui64 GetOutputBuffersTotalSize() const = 0;
+ virtual void IncDisconnections() = 0;
+ virtual void IncUsefulWriteWakeups() = 0;
+ virtual void IncSpuriousWriteWakeups() = 0;
+ virtual void IncSendSyscalls() = 0;
+ virtual void IncInflyLimitReach() = 0;
+ virtual void IncDisconnectByReason(const TString& s) = 0;
+ virtual void IncUsefulReadWakeups() = 0;
+ virtual void IncSpuriousReadWakeups() = 0;
+ virtual void SetPeerInfo(const TString& name, const TString& dataCenterId) = 0;
+ virtual void AddInputChannelsIncomingTraffic(ui16 channel, ui64 incomingTraffic) = 0;
+ virtual void IncInputChannelsIncomingEvents(ui16 channel) = 0;
+ virtual void IncRecvSyscalls() = 0;
+ virtual void AddTotalBytesRead(ui64 value) = 0;
+ virtual void UpdateLegacyPingTimeHist(ui64 value) = 0;
+ virtual void UpdateOutputChannelTraffic(ui16 channel, ui64 value) = 0;
+ virtual void UpdateOutputChannelEvents(ui16 channel) = 0;
+ TString GetHumanFriendlyPeerHostName() const {
+ return HumanFriendlyPeerHostName.value_or(TString());
+ }
+
+protected:
+ std::optional<TString> DataCenterId;
+ std::optional<TString> HumanFriendlyPeerHostName;
+};
+
+std::unique_ptr<IInterconnectMetrics> CreateInterconnectCounters(const NActors::TInterconnectProxyCommon::TPtr& common);
+std::unique_ptr<IInterconnectMetrics> CreateInterconnectMetrics(const NActors::TInterconnectProxyCommon::TPtr& common);
+} // NActors
diff --git a/library/cpp/actors/interconnect/interconnect_handshake.cpp b/library/cpp/actors/interconnect/interconnect_handshake.cpp
new file mode 100644
index 0000000000..9ede998d8e
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_handshake.cpp
@@ -0,0 +1,995 @@
+#include "interconnect_handshake.h"
+#include "interconnect_tcp_proxy.h"
+
+#include <library/cpp/actors/core/actor_coroutine.h>
+#include <library/cpp/actors/core/log.h>
+#include <library/cpp/actors/protos/services_common.pb.h>
+#include <util/system/getpid.h>
+
+#include <google/protobuf/text_format.h>
+
+#include <variant>
+
+namespace NActors {
+ static constexpr size_t StackSize = 64 * 1024; // 64k should be enough
+
+ class THandshakeActor
+ : public TActorCoroImpl
+ , public TInterconnectLoggingBase
+ {
+ struct TExHandshakeFailed : yexception {};
+
+ static constexpr TDuration ResolveTimeout = TDuration::Seconds(1);
+
+#pragma pack(push, 1)
+
+ struct TInitialPacket {
+ struct {
+ TActorId SelfVirtualId;
+ TActorId PeerVirtualId;
+ ui64 NextPacket;
+ ui64 Version;
+ } Header;
+ ui32 Checksum;
+
+ TInitialPacket() = default;
+
+ TInitialPacket(const TActorId& self, const TActorId& peer, ui64 nextPacket, ui64 version) {
+ Header.SelfVirtualId = self;
+ Header.PeerVirtualId = peer;
+ Header.NextPacket = nextPacket;
+ Header.Version = version;
+ Checksum = Crc32cExtendMSanCompatible(0, &Header, sizeof(Header));
+ }
+
+ bool Check() const {
+ return Checksum == Crc32cExtendMSanCompatible(0, &Header, sizeof(Header));
+ }
+
+ TString ToString() const {
+ return TStringBuilder()
+ << "{SelfVirtualId# " << Header.SelfVirtualId.ToString()
+ << " PeerVirtualId# " << Header.PeerVirtualId.ToString()
+ << " NextPacket# " << Header.NextPacket
+ << " Version# " << Header.Version
+ << "}";
+ }
+ };
+
+ struct TExHeader {
+ static constexpr ui32 MaxSize = 1024 * 1024;
+
+ ui32 Checksum;
+ ui32 Size;
+
+ ui32 CalculateChecksum(const void* data, size_t len) const {
+ return Crc32cExtendMSanCompatible(Crc32cExtendMSanCompatible(0, &Size, sizeof(Size)), data, len);
+ }
+
+ void Sign(const void* data, size_t len) {
+ Checksum = CalculateChecksum(data, len);
+ }
+
+ bool Check(const void* data, size_t len) const {
+ return Checksum == CalculateChecksum(data, len);
+ }
+ };
+
+#pragma pack(pop)
+
+ private:
+ TInterconnectProxyCommon::TPtr Common;
+ TActorId SelfVirtualId;
+ TActorId PeerVirtualId;
+ ui32 PeerNodeId = 0;
+ ui64 NextPacketToPeer = 0;
+ TMaybe<ui64> NextPacketFromPeer; // will be obtained from incoming initial packet
+ TString PeerHostName;
+ TString PeerAddr;
+ TSocketPtr Socket;
+ TPollerToken::TPtr PollerToken;
+ TString State;
+ TString HandshakeKind;
+ TMaybe<THolder<TProgramInfo>> ProgramInfo; // filled in in case of successful handshake; even if null
+ TSessionParams Params;
+ bool ResolveTimedOut = false;
+ THashMap<ui32, TInstant> LastLogNotice;
+ const TDuration MuteDuration = TDuration::Seconds(15);
+ TInstant Deadline;
+
+ public:
+ static constexpr IActor::EActivityType ActorActivityType() {
+ return IActor::INTERCONNECT_HANDSHAKE;
+ }
+
+ THandshakeActor(TInterconnectProxyCommon::TPtr common, const TActorId& self, const TActorId& peer,
+ ui32 nodeId, ui64 nextPacket, TString peerHostName, TSessionParams params)
+ : TActorCoroImpl(StackSize, true, true) // allow unhandled poison pills and dtors
+ , Common(std::move(common))
+ , SelfVirtualId(self)
+ , PeerVirtualId(peer)
+ , PeerNodeId(nodeId)
+ , NextPacketToPeer(nextPacket)
+ , PeerHostName(std::move(peerHostName))
+ , HandshakeKind("outgoing handshake")
+ , Params(std::move(params))
+ {
+ Y_VERIFY(SelfVirtualId);
+ Y_VERIFY(SelfVirtualId.NodeId());
+ Y_VERIFY(PeerNodeId);
+ }
+
+ THandshakeActor(TInterconnectProxyCommon::TPtr common, TSocketPtr socket)
+ : TActorCoroImpl(StackSize, true, true) // allow unhandled poison pills and dtors
+ , Common(std::move(common))
+ , Socket(std::move(socket))
+ , HandshakeKind("incoming handshake")
+ {
+ Y_VERIFY(Socket);
+ PeerAddr = TString::Uninitialized(1024);
+ if (GetRemoteAddr(*Socket, PeerAddr.Detach(), PeerAddr.size())) {
+ PeerAddr.resize(strlen(PeerAddr.data()));
+ } else {
+ PeerAddr.clear();
+ }
+ }
+
+ void UpdatePrefix() {
+ SetPrefix(Sprintf("Handshake %s [node %" PRIu32 "]", SelfActorId.ToString().data(), PeerNodeId));
+ }
+
+ void Run() override {
+ UpdatePrefix();
+
+ // set up overall handshake process timer
+ TDuration timeout = Common->Settings.Handshake;
+ if (timeout == TDuration::Zero()) {
+ timeout = DEFAULT_HANDSHAKE_TIMEOUT;
+ }
+ timeout += ResolveTimeout * 2;
+ Deadline = Now() + timeout;
+ Schedule(Deadline, new TEvents::TEvWakeup);
+
+ try {
+ if (Socket) {
+ PerformIncomingHandshake();
+ } else {
+ PerformOutgoingHandshake();
+ }
+
+ // establish encrypted channel, or, in case when encryption is disabled, check if it matches settings
+ if (ProgramInfo) {
+ if (Params.Encryption) {
+ EstablishSecureConnection();
+ } else if (Common->Settings.EncryptionMode == EEncryptionMode::REQUIRED && !Params.AuthOnly) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "Peer doesn't support encryption, which is required");
+ }
+ }
+ } catch (const TExHandshakeFailed&) {
+ ProgramInfo.Clear();
+ }
+
+ if (ProgramInfo) {
+ LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH04", NLog::PRI_INFO, "handshake succeeded");
+ Y_VERIFY(NextPacketFromPeer);
+ if (PollerToken) {
+ Y_VERIFY(PollerToken->RefCount() == 1);
+ PollerToken.Reset(); // ensure we are going to destroy poller token here as we will re-register the socket within other actor
+ }
+ SendToProxy(MakeHolder<TEvHandshakeDone>(std::move(Socket), PeerVirtualId, SelfVirtualId,
+ *NextPacketFromPeer, ProgramInfo->Release(), std::move(Params)));
+ }
+
+ Socket.Reset();
+ }
+
+ void EstablishSecureConnection() {
+ Y_VERIFY(PollerToken && PollerToken->RefCount() == 1);
+ PollerToken.Reset();
+ auto ev = AskProxy<TEvSecureSocket>(MakeHolder<TEvGetSecureSocket>(Socket), "AskProxy(TEvSecureContext)");
+ Socket = std::move(ev->Get()->Socket);
+ RegisterInPoller();
+ const ui32 myNodeId = GetActorSystem()->NodeId;
+ const bool server = myNodeId < PeerNodeId; // keep server/client role permanent to enable easy TLS session resuming
+ for (;;) {
+ TString err;
+ auto& secure = static_cast<NInterconnect::TSecureSocket&>(*Socket);
+ switch (secure.Establish(server, Params.AuthOnly, err)) {
+ case NInterconnect::TSecureSocket::EStatus::SUCCESS:
+ if (Params.AuthOnly) {
+ Params.Encryption = false;
+ Params.AuthCN = secure.GetPeerCommonName();
+ Y_VERIFY(PollerToken && PollerToken->RefCount() == 1);
+ PollerToken.Reset();
+ Socket = secure.Detach();
+ }
+ return;
+
+ case NInterconnect::TSecureSocket::EStatus::ERROR:
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, err, true);
+ [[fallthrough]];
+
+ case NInterconnect::TSecureSocket::EStatus::WANT_READ:
+ WaitPoller(true, false, "ReadEstablish");
+ break;
+
+ case NInterconnect::TSecureSocket::EStatus::WANT_WRITE:
+ WaitPoller(false, true, "WriteEstablish");
+ break;
+ }
+ }
+ }
+
+ void ProcessUnexpectedEvent(TAutoPtr<IEventHandle> ev) override {
+ switch (const ui32 type = ev->GetTypeRewrite()) {
+ case TEvents::TSystem::Wakeup:
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT, Sprintf("Handshake timed out, State# %s", State.data()), true);
+ [[fallthrough]];
+
+ case ui32(ENetwork::NodeInfo):
+ case TEvInterconnect::EvNodeAddress:
+ case ui32(ENetwork::ResolveError):
+ break; // most likely a race with resolve timeout
+
+ case TEvPollerReady::EventType:
+ break;
+
+ default:
+ Y_FAIL("unexpected event 0x%08" PRIx32, type);
+ }
+ }
+
+ template<typename T>
+ void SetupVersionTag(T& proto) {
+ if (Common->VersionInfo) {
+ proto.SetVersionTag(Common->VersionInfo->Tag);
+ for (const TString& accepted : Common->VersionInfo->AcceptedTags) {
+ proto.AddAcceptedVersionTags(accepted);
+ }
+ }
+ }
+
+ template<typename T>
+ void SetupClusterUUID(T& proto) {
+ auto *pb = proto.MutableClusterUUIDs();
+ pb->SetClusterUUID(Common->ClusterUUID);
+ for (const TString& uuid : Common->AcceptUUID) {
+ pb->AddAcceptUUID(uuid);
+ }
+ }
+
+ template<typename T, typename TCallback>
+ void ValidateVersionTag(const T& proto, TCallback&& errorCallback) {
+ // check if we will accept peer's version tag (if peer provides one and if we have accepted list non-empty)
+ if (Common->VersionInfo) {
+ if (!proto.HasVersionTag()) {
+ LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH06", NLog::PRI_WARN,
+ "peer did not report VersionTag, accepting by default");
+ } else if (!Common->VersionInfo->AcceptedTags.count(proto.GetVersionTag())) {
+ // we will not accept peer's tag, so check if remote peer would accept our version tag
+ size_t i;
+ for (i = 0; i < proto.AcceptedVersionTagsSize() && Common->VersionInfo->Tag != proto.GetAcceptedVersionTags(i); ++i)
+ {}
+ if (i == proto.AcceptedVersionTagsSize()) {
+ // peer will neither accept our version -- this is total failure
+ TStringStream s("local/peer version tags did not match accepted ones");
+ s << " local Tag# " << Common->VersionInfo->Tag << " accepted Tags# [";
+ bool first = true;
+ for (const auto& tag : Common->VersionInfo->AcceptedTags) {
+ s << (std::exchange(first, false) ? "" : " ") << tag;
+ }
+ s << "] peer Tag# " << proto.GetVersionTag() << " accepted Tags# [";
+ first = true;
+ for (const auto& tag : proto.GetAcceptedVersionTags()) {
+ s << (std::exchange(first, false) ? "" : " ") << tag;
+ }
+ s << "]";
+ errorCallback(s.Str());
+ }
+ }
+ }
+ }
+
+ template<typename T, typename TCallback>
+ void ValidateClusterUUID(const T& proto, TCallback&& errorCallback, const TMaybe<TString>& uuid = {}) {
+ auto formatList = [](const auto& list) {
+ TStringStream s;
+ s << "[";
+ for (auto it = list.begin(); it != list.end(); ++it) {
+ if (it != list.begin()) {
+ s << " ";
+ }
+ s << *it;
+ }
+ s << "]";
+ return s.Str();
+ };
+ if (!Common->AcceptUUID) {
+ return; // promiscuous mode -- we accept every other peer
+ }
+ if (!proto.HasClusterUUIDs()) {
+ if (uuid) {
+ // old-style checking, peer does not support symmetric protoocol
+ bool matching = false;
+ for (const TString& accepted : Common->AcceptUUID) {
+ if (*uuid == accepted) {
+ matching = true;
+ break;
+ }
+ }
+ if (!matching) {
+ errorCallback(Sprintf("Peer ClusterUUID# %s mismatch, AcceptUUID# %s", uuid->data(), formatList(Common->AcceptUUID).data()));
+ }
+ }
+ return; // remote side did not fill in this field -- old version, symmetric protocol is not supported
+ }
+
+ const auto& uuids = proto.GetClusterUUIDs();
+
+ // check if our UUID matches remote accept list
+ for (const TString& item : uuids.GetAcceptUUID()) {
+ if (item == Common->ClusterUUID) {
+ return; // match
+ }
+ }
+
+ // check if remote UUID matches our accept list
+ const TString& remoteUUID = uuids.GetClusterUUID();
+ for (const TString& item : Common->AcceptUUID) {
+ if (item == remoteUUID) {
+ return; // match
+ }
+ }
+
+ // no match
+ errorCallback(Sprintf("Peer ClusterUUID# %s mismatch, AcceptUUID# %s", remoteUUID.data(), formatList(Common->AcceptUUID).data()));
+ }
+
+ void ParsePeerScopeId(const NActorsInterconnect::TScopeId& proto) {
+ Params.PeerScopeId = {proto.GetX1(), proto.GetX2()};
+ }
+
+ void FillInScopeId(NActorsInterconnect::TScopeId& proto) {
+ const TScopeId& scope = Common->LocalScopeId;
+ proto.SetX1(scope.first);
+ proto.SetX2(scope.second);
+ }
+
+ template<typename T>
+ void ReportProto(const T& protobuf, const char *msg) {
+ auto formatString = [&] {
+ google::protobuf::TextFormat::Printer p;
+ p.SetSingleLineMode(true);
+ TString s;
+ p.PrintToString(protobuf, &s);
+ return s;
+ };
+ LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH07", NLog::PRI_DEBUG, "%s %s", msg,
+ formatString().data());
+ }
+
+ bool CheckPeerCookie(const TString& cookie, TString *error) {
+ // create a temporary socket to connect to the peer
+ TSocketPtr tempSocket;
+ std::swap(tempSocket, Socket);
+ TPollerToken::TPtr tempPollerToken;
+ std::swap(tempPollerToken, PollerToken);
+
+ // set up virtual self id to ensure peer will not drop our connection
+ char buf[12] = {'c', 'o', 'o', 'k', 'i', 'e', ' ', 'c', 'h', 'e', 'c', 'k'};
+ SelfVirtualId = TActorId(SelfActorId.NodeId(), TStringBuf(buf, 12));
+
+ bool success = true;
+ try {
+ // issue connection and send initial packet
+ Connect(false);
+ SendInitialPacket();
+
+ // wait for basic response
+ TInitialPacket response;
+ ReceiveData(&response, sizeof(response), "ReceiveResponse");
+ if (!response.Check()) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT, "Initial packet CRC error");
+ } else if (response.Header.Version != INTERCONNECT_PROTOCOL_VERSION) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, Sprintf("Incompatible protocol %" PRIu64, response.Header.Version));
+ }
+
+ // issue cookie check request
+ NActorsInterconnect::THandshakeRequest request;
+ request.SetProtocol(INTERCONNECT_PROTOCOL_VERSION);
+ request.SetProgramPID(0);
+ request.SetProgramStartTime(0);
+ request.SetSerial(0);
+ request.SetReceiverNodeId(0);
+ request.SetSenderActorId(TString());
+ request.SetCookie(cookie);
+ request.SetDoCheckCookie(true);
+ SendExBlock(request, "SendExBlockDoCheckCookie");
+
+ // process cookie check reply
+ NActorsInterconnect::THandshakeReply reply;
+ if (!reply.ParseFromString(ReceiveExBlock("ReceiveExBlockDoCheckCookie"))) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "Incorrect packet from peer");
+ } else if (reply.HasCookieCheckResult() && !reply.GetCookieCheckResult()) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "Cookie check error -- possible network problem");
+ }
+ } catch (const TExHandshakeFailed& e) {
+ *error = e.what();
+ success = false;
+ }
+
+ // restore state
+ SelfVirtualId = TActorId();
+ std::swap(tempSocket, Socket);
+ std::swap(tempPollerToken, PollerToken);
+ return success;
+ }
+
+ void PerformOutgoingHandshake() {
+ LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH01", NLog::PRI_DEBUG,
+ "starting outgoing handshake");
+
+ // perform connection
+ Connect(true);
+
+ // send initial request packet
+ SendInitialPacket();
+
+ TInitialPacket response;
+ ReceiveData(&response, sizeof(response), "ReceiveResponse");
+ if (!response.Check()) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT, "Initial packet CRC error");
+ } else if (response.Header.Version != INTERCONNECT_PROTOCOL_VERSION) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, Sprintf("Incompatible protocol %" PRIu64, response.Header.Version));
+ }
+
+ // extract next packet
+ NextPacketFromPeer = response.Header.NextPacket;
+
+ if (!PeerVirtualId) {
+ // creating new session -- we have to generate request
+ NActorsInterconnect::THandshakeRequest request;
+
+ request.SetProtocol(INTERCONNECT_PROTOCOL_VERSION);
+ request.SetProgramPID(GetPID());
+ request.SetProgramStartTime(Common->StartTime);
+ request.SetSerial(SelfVirtualId.LocalId());
+ request.SetReceiverNodeId(PeerNodeId);
+ request.SetSenderActorId(SelfVirtualId.ToString());
+ request.SetSenderHostName(Common->TechnicalSelfHostName);
+ request.SetReceiverHostName(PeerHostName);
+
+ if (Common->LocalScopeId != TScopeId()) {
+ FillInScopeId(*request.MutableClientScopeId());
+ }
+
+ if (Common->Cookie) {
+ request.SetCookie(Common->Cookie);
+ }
+ if (Common->ClusterUUID) {
+ request.SetUUID(Common->ClusterUUID);
+ }
+ SetupClusterUUID(request);
+ SetupVersionTag(request);
+
+ if (const ui32 size = Common->HandshakeBallastSize) {
+ TString ballast(size, 0);
+ char* data = ballast.Detach();
+ for (ui32 i = 0; i < size; ++i) {
+ data[i] = i;
+ }
+ request.SetBallast(ballast);
+ }
+
+ switch (Common->Settings.EncryptionMode) {
+ case EEncryptionMode::DISABLED:
+ break;
+
+ case EEncryptionMode::OPTIONAL:
+ request.SetRequireEncryption(false);
+ break;
+
+ case EEncryptionMode::REQUIRED:
+ request.SetRequireEncryption(true);
+ break;
+ }
+
+ request.SetRequestModernFrame(true);
+ request.SetRequestAuthOnly(Common->Settings.TlsAuthOnly);
+
+ SendExBlock(request, "ExRequest");
+
+ NActorsInterconnect::THandshakeReply reply;
+ if (!reply.ParseFromString(ReceiveExBlock("ExReply"))) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "Incorrect THandshakeReply");
+ }
+ ReportProto(reply, "ReceiveExBlock ExReply");
+
+ if (reply.HasErrorExplaination()) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "error from peer: " + reply.GetErrorExplaination());
+ } else if (!reply.HasSuccess()) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "empty reply");
+ }
+
+ auto generateError = [this](TString msg) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, msg);
+ };
+
+ const auto& success = reply.GetSuccess();
+ ValidateClusterUUID(success, generateError);
+ ValidateVersionTag(success, generateError);
+
+ const auto& s = success.GetSenderActorId();
+ PeerVirtualId.Parse(s.data(), s.size());
+
+ // recover flags
+ Params.Encryption = success.GetStartEncryption();
+ Params.UseModernFrame = success.GetUseModernFrame();
+ Params.AuthOnly = Params.Encryption && success.GetAuthOnly();
+ if (success.HasServerScopeId()) {
+ ParsePeerScopeId(success.GetServerScopeId());
+ }
+
+ // recover peer process info from peer's reply
+ ProgramInfo = GetProgramInfo(success);
+ } else if (!response.Header.SelfVirtualId) {
+ // peer reported error -- empty ack was generated by proxy for this request
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_SESSION_MISMATCH, "Peer rejected session continuation handshake");
+ } else if (response.Header.SelfVirtualId != PeerVirtualId || response.Header.PeerVirtualId != SelfVirtualId) {
+ // resuming existing session; check that virtual ids of peers match each other
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_SESSION_MISMATCH, "Session virtual ID mismatch");
+ } else {
+ ProgramInfo.ConstructInPlace(); // successful handshake
+ }
+ }
+
+ void PerformIncomingHandshake() {
+ LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH02", NLog::PRI_DEBUG,
+ "starting incoming handshake");
+
+ // set up incoming socket
+ SetupSocket();
+
+ // wait for initial request packet
+ TInitialPacket request;
+ ReceiveData(&request, sizeof(request), "ReceiveRequest");
+ if (!request.Check()) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT, "Initial packet CRC error");
+ } else if (request.Header.Version != INTERCONNECT_PROTOCOL_VERSION) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, Sprintf("Incompatible protocol %" PRIu64, request.Header.Version));
+ }
+
+ // extract peer node id from the peer
+ PeerNodeId = request.Header.SelfVirtualId.NodeId();
+ if (!PeerNodeId) {
+ Y_VERIFY_DEBUG(false, "PeerNodeId is zero request# %s", request.ToString().data());
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "SelfVirtualId.NodeId is empty in initial packet");
+ }
+ UpdatePrefix();
+
+ // extract next packet
+ NextPacketFromPeer = request.Header.NextPacket;
+
+ if (request.Header.PeerVirtualId) {
+ // issue request to the proxy and wait for the response
+ auto reply = AskProxy<TEvHandshakeAck, TEvHandshakeNak>(MakeHolder<TEvHandshakeAsk>(
+ request.Header.SelfVirtualId, request.Header.PeerVirtualId, request.Header.NextPacket),
+ "TEvHandshakeAsk");
+ if (auto *ack = reply->CastAsLocal<TEvHandshakeAck>()) {
+ // extract self/peer virtual ids
+ SelfVirtualId = ack->Self;
+ PeerVirtualId = request.Header.SelfVirtualId;
+ NextPacketToPeer = ack->NextPacket;
+ Params = ack->Params;
+
+ // only succeed in case when proxy returned valid SelfVirtualId; otherwise it wants us to terminate
+ // the handshake process and it does not expect the handshake reply
+ ProgramInfo.ConstructInPlace();
+ } else {
+ LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH08", NLog::PRI_NOTICE,
+ "Continuation request rejected by proxy");
+
+ // report continuation reject to peer
+ SelfVirtualId = TActorId();
+ PeerVirtualId = TActorId();
+ NextPacketToPeer = 0;
+ }
+
+ // issue response to the peer
+ SendInitialPacket();
+ } else {
+ // peer wants a new session, clear fields and send initial packet
+ SelfVirtualId = TActorId();
+ PeerVirtualId = TActorId();
+ NextPacketToPeer = 0;
+ SendInitialPacket();
+
+ // wait for extended request
+ auto ev = MakeHolder<TEvHandshakeRequest>();
+ auto& request = ev->Record;
+ if (!request.ParseFromString(ReceiveExBlock("ExRequest"))) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "Incorrect THandshakeRequest");
+ }
+ ReportProto(request, "ReceiveExBlock ExRequest");
+
+ auto generateError = [this](TString msg) {
+ // issue reply to the peer to prevent repeating connection retries
+ NActorsInterconnect::THandshakeReply reply;
+ reply.SetErrorExplaination(msg);
+ SendExBlock(reply, "ExReply");
+
+ // terminate ths handshake
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, msg);
+ };
+
+ // check request cookie
+ TString error;
+ if (request.HasDoCheckCookie()) {
+ NActorsInterconnect::THandshakeReply reply;
+ reply.SetCookieCheckResult(request.GetCookie() == Common->Cookie);
+ SendExBlock(reply, "ExReplyDoCheckCookie");
+ throw TExHandshakeFailed();
+ } else if (request.HasCookie() && !CheckPeerCookie(request.GetCookie(), &error)) {
+ generateError(TStringBuilder() << "Peer connectivity-checking failed, error# " << error);
+ }
+
+ // update log prefix with the reported peer host name
+ PeerHostName = request.GetSenderHostName();
+
+ // parse peer virtual id
+ const auto& str = request.GetSenderActorId();
+ PeerVirtualId.Parse(str.data(), str.size());
+
+ // validate request
+ ValidateClusterUUID(request, generateError, request.GetUUID());
+ if (request.GetReceiverNodeId() != SelfActorId.NodeId()) {
+ generateError(Sprintf("Incorrect ReceiverNodeId# %" PRIu32 " from the peer, expected# %" PRIu32,
+ request.GetReceiverNodeId(), SelfActorId.NodeId()));
+ } else if (request.GetReceiverHostName() != Common->TechnicalSelfHostName) {
+ generateError(Sprintf("ReceiverHostName# %s mismatch, expected# %s", request.GetReceiverHostName().data(),
+ Common->TechnicalSelfHostName.data()));
+ }
+ ValidateVersionTag(request, generateError);
+
+ // check peer node
+ auto peerNodeInfo = GetPeerNodeInfo();
+ if (!peerNodeInfo) {
+ generateError("Peer node not registered in nameservice");
+ } else if (peerNodeInfo->Host != request.GetSenderHostName()) {
+ generateError("SenderHostName mismatch");
+ }
+
+ // check request against encryption
+ switch (Common->Settings.EncryptionMode) {
+ case EEncryptionMode::DISABLED:
+ if (request.GetRequireEncryption()) {
+ generateError("Peer requested encryption, but it is disabled locally");
+ }
+ break;
+
+ case EEncryptionMode::OPTIONAL:
+ Params.Encryption = request.HasRequireEncryption();
+ break;
+
+ case EEncryptionMode::REQUIRED:
+ if (!request.HasRequireEncryption()) {
+ generateError("Peer did not request encryption, but it is required locally");
+ }
+ Params.Encryption = true;
+ break;
+ }
+
+ Params.UseModernFrame = request.GetRequestModernFrame();
+ Params.AuthOnly = Params.Encryption && request.GetRequestAuthOnly() && Common->Settings.TlsAuthOnly;
+
+ if (request.HasClientScopeId()) {
+ ParsePeerScopeId(request.GetClientScopeId());
+ }
+
+ // remember program info (assuming successful handshake)
+ ProgramInfo = GetProgramInfo(request);
+
+ // send to proxy
+ auto reply = AskProxy<TEvHandshakeReplyOK, TEvHandshakeReplyError>(std::move(ev), "TEvHandshakeRequest");
+
+ // parse it
+ if (auto ev = reply->CastAsLocal<TEvHandshakeReplyOK>()) {
+ // issue successful reply to the peer
+ auto& record = ev->Record;
+ Y_VERIFY(record.HasSuccess());
+ auto& success = *record.MutableSuccess();
+ SetupClusterUUID(success);
+ SetupVersionTag(success);
+ success.SetStartEncryption(Params.Encryption);
+ if (Common->LocalScopeId != TScopeId()) {
+ FillInScopeId(*success.MutableServerScopeId());
+ }
+ success.SetUseModernFrame(Params.UseModernFrame);
+ success.SetAuthOnly(Params.AuthOnly);
+ SendExBlock(record, "ExReply");
+
+ // extract sender actor id (self virtual id)
+ const auto& str = success.GetSenderActorId();
+ SelfVirtualId.Parse(str.data(), str.size());
+ } else if (auto ev = reply->CastAsLocal<TEvHandshakeReplyError>()) {
+ // in case of error just send reply to the peer and terminate handshake
+ SendExBlock(ev->Record, "ExReply");
+ ProgramInfo.Clear(); // do not issue reply to the proxy
+ } else {
+ Y_FAIL("unexpected event Type# 0x%08" PRIx32, reply->GetTypeRewrite());
+ }
+ }
+ }
+
+ template <typename T>
+ void SendExBlock(const T& proto, const char* what) {
+ TString data;
+ Y_PROTOBUF_SUPPRESS_NODISCARD proto.SerializeToString(&data);
+ Y_VERIFY(data.size() <= TExHeader::MaxSize);
+
+ ReportProto(proto, Sprintf("SendExBlock %s", what).data());
+
+ TExHeader header;
+ header.Size = data.size();
+ header.Sign(data.data(), data.size());
+ SendData(&header, sizeof(header), Sprintf("Send%sHeader", what));
+ SendData(data.data(), data.size(), Sprintf("Send%sData", what));
+ }
+
+ TString ReceiveExBlock(const char* what) {
+ TExHeader header;
+ ReceiveData(&header, sizeof(header), Sprintf("Receive%sHeader", what));
+ if (header.Size > TExHeader::MaxSize) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "Incorrect extended header size");
+ }
+
+ TString data;
+ data.resize(header.Size);
+ ReceiveData(data.Detach(), data.size(), Sprintf("Receive%sData", what));
+
+ if (!header.Check(data.data(), data.size())) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT, "Extended header CRC error");
+ }
+
+ return data;
+ }
+
+ private:
+ void SendToProxy(THolder<IEventBase> ev) {
+ Y_VERIFY(PeerNodeId);
+ Send(GetActorSystem()->InterconnectProxy(PeerNodeId), ev.Release());
+ }
+
+ template <typename TEvent>
+ THolder<typename TEvent::THandle> WaitForSpecificEvent(TString state, TInstant deadline = TInstant::Max()) {
+ State = std::move(state);
+ return TActorCoroImpl::WaitForSpecificEvent<TEvent>(deadline);
+ }
+
+ template <typename T1, typename T2, typename... TEvents>
+ THolder<IEventHandle> WaitForSpecificEvent(TString state, TInstant deadline = TInstant::Max()) {
+ State = std::move(state);
+ return TActorCoroImpl::WaitForSpecificEvent<T1, T2, TEvents...>(deadline);
+ }
+
+ template <typename TEvent>
+ THolder<typename TEvent::THandle> AskProxy(THolder<IEventBase> ev, TString state) {
+ SendToProxy(std::move(ev));
+ return WaitForSpecificEvent<TEvent>(std::move(state));
+ }
+
+ template <typename T1, typename T2, typename... TOther>
+ THolder<IEventHandle> AskProxy(THolder<IEventBase> ev, TString state) {
+ SendToProxy(std::move(ev));
+ return WaitForSpecificEvent<T1, T2, TOther...>(std::move(state));
+ }
+
+ void Fail(TEvHandshakeFail::EnumHandshakeFail reason, TString explanation, bool network = false) {
+ TString msg = Sprintf("%s Peer# %s(%s) %s%s", HandshakeKind.data(), PeerHostName ? PeerHostName.data() : "<unknown>",
+ PeerAddr.size() ? PeerAddr.data() : "<unknown>", ResolveTimedOut ? "[resolve timeout] " : "",
+ explanation.data());
+
+ if (network) {
+ TInstant now = Now();
+ TInstant prevLog = LastLogNotice[PeerNodeId];
+ NActors::NLog::EPriority logPriority = NActors::NLog::PRI_DEBUG;
+ if (now - prevLog > MuteDuration) {
+ logPriority = NActors::NLog::PRI_NOTICE;
+ LastLogNotice[PeerNodeId] = now;
+ }
+ LOG_LOG_NET_X(logPriority, PeerNodeId, "network-related error occured on handshake: %s", msg.data());
+ } else {
+ // calculate log severity based on failure type; permanent failures lead to error log messages
+ auto severity = reason == TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT
+ ? NActors::NLog::PRI_NOTICE
+ : NActors::NLog::PRI_INFO;
+
+ LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH03", severity, "handshake failed, explanation# %s", msg.data());
+ }
+
+ if (PeerNodeId) {
+ SendToProxy(MakeHolder<TEvHandshakeFail>(reason, std::move(msg)));
+ }
+
+ throw TExHandshakeFailed() << explanation;
+ }
+
+ private:
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // COMMUNICATION BLOCK
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ void Connect(bool updatePeerAddr) {
+ // issue request to a nameservice to resolve peer node address
+ Send(Common->NameserviceId, new TEvInterconnect::TEvResolveNode(PeerNodeId, Deadline));
+
+ // wait for the result
+ auto ev = WaitForSpecificEvent<TEvResolveError, TEvLocalNodeInfo, TEvInterconnect::TEvNodeAddress>("ResolveNode",
+ Now() + ResolveTimeout);
+
+ // extract address from the result
+ NInterconnect::TAddress address;
+ if (!ev) {
+ ResolveTimedOut = true;
+ if (auto peerNodeInfo = GetPeerNodeInfo(); peerNodeInfo && peerNodeInfo->Address) {
+ address = {peerNodeInfo->Address, peerNodeInfo->Port};
+ } else {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "DNS resolve timed out and no static address defined", true);
+ }
+ } else if (auto *p = ev->CastAsLocal<TEvLocalNodeInfo>()) {
+ if (!p->Address) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "DNS resolve error: no address returned", true);
+ }
+ address = {*p->Address};
+ } else if (auto *p = ev->CastAsLocal<TEvInterconnect::TEvNodeAddress>()) {
+ const auto& r = p->Record;
+ if (!r.HasAddress() || !r.HasPort()) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "DNS resolve error: no address returned", true);
+ }
+ address = {r.GetAddress(), static_cast<ui16>(r.GetPort())};
+ } else {
+ Y_VERIFY(ev->GetTypeRewrite() == ui32(ENetwork::ResolveError));
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "DNS resolve error: " + ev->Get<TEvResolveError>()->Explain, true);
+ }
+
+ // create the socket with matching address family
+ Socket = NInterconnect::TStreamSocket::Make(address.GetFamily());
+ if (*Socket == -1) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "System error: failed to create socket");
+ }
+
+ // extract peer address
+ if (updatePeerAddr) {
+ PeerAddr = address.ToString();
+ }
+
+ // set up socket parameters
+ SetupSocket();
+
+ // start connecting
+ switch (int err = -Socket->Connect(address)) {
+ case 0: // successful connection
+ break;
+
+ case EINPROGRESS: // connection in progress
+ WaitPoller(false, true, "WaitConnect");
+ err = Socket->GetConnectStatus();
+ if (err) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, Sprintf("Connection failed: %s", strerror(err)), true);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ auto it = LastLogNotice.find(PeerNodeId);
+ NActors::NLog::EPriority logPriority = NActors::NLog::PRI_DEBUG;
+ if (it != LastLogNotice.end()) {
+ LastLogNotice.erase(it);
+ logPriority = NActors::NLog::PRI_NOTICE;
+ }
+ LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH05", logPriority, "connected to peer");
+ }
+
+ void SetupSocket() {
+ // switch to nonblocking mode
+ try {
+ SetNonBlock(*Socket);
+ SetNoDelay(*Socket, true);
+ } catch (...) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "System error: can't up nonblocking mode for socket");
+ }
+
+ // setup send buffer size
+ Socket->SetSendBufferSize(Common->Settings.GetSendBufferSize());
+
+ // register in poller
+ RegisterInPoller();
+ }
+
+ void RegisterInPoller() {
+ const bool success = Send(MakePollerActorId(), new TEvPollerRegister(Socket, SelfActorId, SelfActorId));
+ Y_VERIFY(success);
+ auto result = WaitForSpecificEvent<TEvPollerRegisterResult>("RegisterPoller");
+ PollerToken = std::move(result->Get()->PollerToken);
+ Y_VERIFY(PollerToken);
+ Y_VERIFY(PollerToken->RefCount() == 1); // ensure exclusive ownership
+ }
+
+ void SendInitialPacket() {
+ TInitialPacket packet(SelfVirtualId, PeerVirtualId, NextPacketToPeer, INTERCONNECT_PROTOCOL_VERSION);
+ SendData(&packet, sizeof(packet), "SendInitialPacket");
+ }
+
+ void WaitPoller(bool read, bool write, TString state) {
+ PollerToken->Request(read, write);
+ WaitForSpecificEvent<TEvPollerReady>(std::move(state));
+ }
+
+ template <typename TDataPtr, typename TSendRecvFunc>
+ void Process(TDataPtr buffer, size_t len, TSendRecvFunc&& sendRecv, bool read, bool write, TString state) {
+ Y_VERIFY(Socket);
+ NInterconnect::TStreamSocket* sock = Socket.Get();
+ ssize_t (NInterconnect::TStreamSocket::*pfn)(TDataPtr, size_t, TString*) const = sendRecv;
+ size_t processed = 0;
+
+ auto error = [&](TString msg) {
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT, Sprintf("Socket error# %s state# %s processed# %zu remain# %zu",
+ msg.data(), state.data(), processed, len), true);
+ };
+
+ while (len) {
+ TString err;
+ ssize_t nbytes = (sock->*pfn)(buffer, len, &err);
+ if (nbytes > 0) {
+ buffer = (char*)buffer + nbytes;
+ len -= nbytes;
+ processed += nbytes;
+ } else if (-nbytes == EAGAIN || -nbytes == EWOULDBLOCK) {
+ WaitPoller(read, write, state);
+ } else if (!nbytes) {
+ error("connection unexpectedly closed");
+ } else if (-nbytes != EINTR) {
+ error(err ? err : TString(strerror(-nbytes)));
+ }
+ }
+ }
+
+ void SendData(const void* buffer, size_t len, TString state) {
+ Process(buffer, len, &NInterconnect::TStreamSocket::Send, false, true, std::move(state));
+ }
+
+ void ReceiveData(void* buffer, size_t len, TString state) {
+ Process(buffer, len, &NInterconnect::TStreamSocket::Recv, true, false, std::move(state));
+ }
+
+ THolder<TEvInterconnect::TNodeInfo> GetPeerNodeInfo() {
+ Y_VERIFY(PeerNodeId);
+ Send(Common->NameserviceId, new TEvInterconnect::TEvGetNode(PeerNodeId, Deadline));
+ auto response = WaitForSpecificEvent<TEvInterconnect::TEvNodeInfo>("GetPeerNodeInfo");
+ return std::move(response->Get()->Node);
+ }
+
+ template <typename T>
+ static THolder<TProgramInfo> GetProgramInfo(const T& proto) {
+ auto programInfo = MakeHolder<TProgramInfo>();
+ programInfo->PID = proto.GetProgramPID();
+ programInfo->StartTime = proto.GetProgramStartTime();
+ programInfo->Serial = proto.GetSerial();
+ return programInfo;
+ }
+ };
+
+ IActor* CreateOutgoingHandshakeActor(TInterconnectProxyCommon::TPtr common, const TActorId& self,
+ const TActorId& peer, ui32 nodeId, ui64 nextPacket, TString peerHostName,
+ TSessionParams params) {
+ return new TActorCoro(MakeHolder<THandshakeActor>(std::move(common), self, peer, nodeId, nextPacket,
+ std::move(peerHostName), std::move(params)));
+ }
+
+ IActor* CreateIncomingHandshakeActor(TInterconnectProxyCommon::TPtr common, TSocketPtr socket) {
+ return new TActorCoro(MakeHolder<THandshakeActor>(std::move(common), std::move(socket)));
+ }
+
+}
diff --git a/library/cpp/actors/interconnect/interconnect_handshake.h b/library/cpp/actors/interconnect/interconnect_handshake.h
new file mode 100644
index 0000000000..b3c0db6c5d
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_handshake.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <library/cpp/actors/core/hfunc.h>
+#include <library/cpp/actors/core/event_pb.h>
+#include <library/cpp/actors/core/events.h>
+
+#include "interconnect_common.h"
+#include "interconnect_impl.h"
+#include "poller_tcp.h"
+#include "events_local.h"
+
+namespace NActors {
+ static constexpr TDuration DEFAULT_HANDSHAKE_TIMEOUT = TDuration::Seconds(1);
+ static constexpr ui64 INTERCONNECT_PROTOCOL_VERSION = 2;
+
+ using TSocketPtr = TIntrusivePtr<NInterconnect::TStreamSocket>;
+
+ IActor* CreateOutgoingHandshakeActor(TInterconnectProxyCommon::TPtr common, const TActorId& self,
+ const TActorId& peer, ui32 nodeId, ui64 nextPacket, TString peerHostName,
+ TSessionParams params);
+
+ IActor* CreateIncomingHandshakeActor(TInterconnectProxyCommon::TPtr common, TSocketPtr socket);
+
+}
diff --git a/library/cpp/actors/interconnect/interconnect_impl.h b/library/cpp/actors/interconnect/interconnect_impl.h
new file mode 100644
index 0000000000..ee29e4d397
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_impl.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include "interconnect.h"
+#include <library/cpp/actors/protos/interconnect.pb.h>
+#include <library/cpp/actors/core/event_pb.h>
+#include <library/cpp/actors/helpers/mon_histogram_helper.h>
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+
+namespace NActors {
+ // resolve node info
+ struct TEvInterconnect::TEvResolveNode: public TEventPB<TEvInterconnect::TEvResolveNode, NActorsInterconnect::TEvResolveNode, TEvInterconnect::EvResolveNode> {
+ TEvResolveNode() {
+ }
+
+ TEvResolveNode(ui32 nodeId, TInstant deadline = TInstant::Max()) {
+ Record.SetNodeId(nodeId);
+ if (deadline != TInstant::Max()) {
+ Record.SetDeadline(deadline.GetValue());
+ }
+ }
+ };
+
+ // node info
+ struct TEvInterconnect::TEvNodeAddress: public TEventPB<TEvInterconnect::TEvNodeAddress, NActorsInterconnect::TEvNodeInfo, TEvInterconnect::EvNodeAddress> {
+ TEvNodeAddress() {
+ }
+
+ TEvNodeAddress(ui32 nodeId) {
+ Record.SetNodeId(nodeId);
+ }
+ };
+
+ // register node
+ struct TEvInterconnect::TEvRegisterNode: public TEventBase<TEvInterconnect::TEvRegisterNode, TEvInterconnect::EvRegisterNode> {
+ };
+
+ // reply on register node
+ struct TEvInterconnect::TEvRegisterNodeResult: public TEventBase<TEvInterconnect::TEvRegisterNodeResult, TEvInterconnect::EvRegisterNodeResult> {
+ };
+
+ // disconnect
+ struct TEvInterconnect::TEvDisconnect: public TEventLocal<TEvInterconnect::TEvDisconnect, TEvInterconnect::EvDisconnect> {
+ };
+
+}
diff --git a/library/cpp/actors/interconnect/interconnect_mon.cpp b/library/cpp/actors/interconnect/interconnect_mon.cpp
new file mode 100644
index 0000000000..cf924ccbf9
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_mon.cpp
@@ -0,0 +1,276 @@
+#include "interconnect_mon.h"
+#include "interconnect_tcp_proxy.h"
+
+#include <library/cpp/json/json_value.h>
+#include <library/cpp/json/json_writer.h>
+#include <library/cpp/monlib/service/pages/templates.h>
+
+#include <openssl/ssl.h>
+#include <openssl/pem.h>
+
+namespace NInterconnect {
+
+ using namespace NActors;
+
+ class TInterconnectMonActor : public TActor<TInterconnectMonActor> {
+ class TQueryProcessor : public TActorBootstrapped<TQueryProcessor> {
+ const TActorId Sender;
+ const bool Json;
+ TMap<ui32, TInterconnectProxyTCP::TProxyStats> Stats;
+ ui32 PendingReplies = 0;
+
+ public:
+ static constexpr IActor::EActorActivity ActorActivityType() {
+ return INTERCONNECT_MONACTOR;
+ }
+
+ TQueryProcessor(const TActorId& sender, bool json)
+ : Sender(sender)
+ , Json(json)
+ {}
+
+ void Bootstrap(const TActorContext& ctx) {
+ Become(&TThis::StateFunc, ctx, TDuration::Seconds(5), new TEvents::TEvWakeup);
+ Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes);
+ }
+
+ void Handle(TEvInterconnect::TEvNodesInfo::TPtr ev, const TActorContext& ctx) {
+ TActorSystem* const as = ctx.ExecutorThread.ActorSystem;
+ for (const auto& node : ev->Get()->Nodes) {
+ Send(as->InterconnectProxy(node.NodeId), new TInterconnectProxyTCP::TEvQueryStats, IEventHandle::FlagTrackDelivery);
+ ++PendingReplies;
+ }
+ GenerateResultWhenReady(ctx);
+ }
+
+ STRICT_STFUNC(StateFunc,
+ HFunc(TEvInterconnect::TEvNodesInfo, Handle)
+ HFunc(TInterconnectProxyTCP::TEvStats, Handle)
+ CFunc(TEvents::TSystem::Undelivered, HandleUndelivered)
+ CFunc(TEvents::TSystem::Wakeup, HandleWakeup)
+ )
+
+ void Handle(TInterconnectProxyTCP::TEvStats::TPtr& ev, const TActorContext& ctx) {
+ auto *msg = ev->Get();
+ Stats.emplace(msg->PeerNodeId, std::move(msg->ProxyStats));
+ --PendingReplies;
+ GenerateResultWhenReady(ctx);
+ }
+
+ void HandleUndelivered(const TActorContext& ctx) {
+ --PendingReplies;
+ GenerateResultWhenReady(ctx);
+ }
+
+ void HandleWakeup(const TActorContext& ctx) {
+ PendingReplies = 0;
+ GenerateResultWhenReady(ctx);
+ }
+
+ void GenerateResultWhenReady(const TActorContext& ctx) {
+ if (!PendingReplies) {
+ if (Json) {
+ ctx.Send(Sender, new NMon::TEvHttpInfoRes(GenerateJson(), 0, NMon::IEvHttpInfoRes::EContentType::Custom));
+ } else {
+ ctx.Send(Sender, new NMon::TEvHttpInfoRes(GenerateHtml()));
+ }
+ Die(ctx);
+ }
+ }
+
+ TString GenerateHtml() {
+ TStringStream str;
+ HTML(str) {
+ TABLE_CLASS("table-sortable table") {
+ TABLEHEAD() {
+ TABLER() {
+ TABLEH() { str << "Peer node id"; }
+ TABLEH() { str << "State"; }
+ TABLEH() { str << "Ping"; }
+ TABLEH() { str << "Clock skew"; }
+ TABLEH() { str << "Scope id"; }
+ TABLEH() { str << "Encryption"; }
+ TABLEH() { str << "LastSessionDieTime"; }
+ TABLEH() { str << "TotalOutputQueueSize"; }
+ TABLEH() { str << "Connected"; }
+ TABLEH() { str << "Host"; }
+ TABLEH() { str << "Port"; }
+ TABLEH() { str << "LastErrorTimestamp"; }
+ TABLEH() { str << "LastErrorKind"; }
+ TABLEH() { str << "LastErrorExplanation"; }
+ }
+ }
+ TABLEBODY() {
+ for (const auto& kv : Stats) {
+ TABLER() {
+ TABLED() { str << "<a href='" << kv.second.Path << "'>" << kv.first << "</a>"; }
+ TABLED() { str << kv.second.State; }
+ TABLED() {
+ if (kv.second.Ping != TDuration::Zero()) {
+ str << kv.second.Ping;
+ }
+ }
+ TABLED() {
+ if (kv.second.ClockSkew < 0) {
+ str << "-" << TDuration::MicroSeconds(-kv.second.ClockSkew);
+ } else {
+ str << "+" << TDuration::MicroSeconds(kv.second.ClockSkew);
+ }
+ }
+ TABLED() { str << ScopeIdToString(kv.second.PeerScopeId); }
+ TABLED() {
+ const char *color = kv.second.Encryption != "none" ? "green" : "red";
+ str << "<font color='" << color << "'>" << kv.second.Encryption << "</font>";
+ }
+ TABLED() {
+ if (kv.second.LastSessionDieTime != TInstant::Zero()) {
+ str << kv.second.LastSessionDieTime;
+ }
+ }
+ TABLED() { str << kv.second.TotalOutputQueueSize; }
+ TABLED() { str << (kv.second.Connected ? "yes" : "<strong>no</strong>"); }
+ TABLED() { str << kv.second.Host; }
+ TABLED() { str << kv.second.Port; }
+ TABLED() {
+ str << "<strong>";
+ if (kv.second.LastErrorTimestamp != TInstant::Zero()) {
+ str << kv.second.LastErrorTimestamp;
+ }
+ str << "</strong>";
+ }
+ TABLED() { str << "<strong>" << kv.second.LastErrorKind << "</strong>"; }
+ TABLED() { str << "<strong>" << kv.second.LastErrorExplanation << "</strong>"; }
+ }
+ }
+ }
+ }
+ }
+ return str.Str();
+ }
+
+ TString GenerateJson() {
+ NJson::TJsonValue json;
+ for (const auto& [nodeId, info] : Stats) {
+ NJson::TJsonValue item;
+ item["NodeId"] = nodeId;
+
+ auto id = [](const auto& x) { return x; };
+ auto toString = [](const auto& x) { return x.ToString(); };
+
+#define JSON(NAME, FUN) item[#NAME] = FUN(info.NAME);
+ JSON(Path, id)
+ JSON(State, id)
+ JSON(PeerScopeId, ScopeIdToString)
+ JSON(LastSessionDieTime, toString)
+ JSON(TotalOutputQueueSize, id)
+ JSON(Connected, id)
+ JSON(Host, id)
+ JSON(Port, id)
+ JSON(LastErrorTimestamp, toString)
+ JSON(LastErrorKind, id)
+ JSON(LastErrorExplanation, id)
+ JSON(Ping, toString)
+ JSON(ClockSkew, id)
+ JSON(Encryption, id)
+#undef JSON
+
+ json[ToString(nodeId)] = item;
+ }
+ TStringStream str(NMonitoring::HTTPOKJSON);
+ NJson::WriteJson(&str, &json);
+ return str.Str();
+ }
+ };
+
+ private:
+ TIntrusivePtr<TInterconnectProxyCommon> Common;
+
+ public:
+ static constexpr IActor::EActorActivity ActorActivityType() {
+ return INTERCONNECT_MONACTOR;
+ }
+
+ TInterconnectMonActor(TIntrusivePtr<TInterconnectProxyCommon> common)
+ : TActor(&TThis::StateFunc)
+ , Common(std::move(common))
+ {}
+
+ STRICT_STFUNC(StateFunc,
+ HFunc(NMon::TEvHttpInfo, Handle)
+ )
+
+ void Handle(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx) {
+ const auto& params = ev->Get()->Request.GetParams();
+ int certinfo = 0;
+ if (TryFromString(params.Get("certinfo"), certinfo) && certinfo) {
+ ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(GetCertInfoJson(), ev->Get()->SubRequestId,
+ NMon::TEvHttpInfoRes::Custom));
+ } else {
+ const bool json = params.Has("fmt") && params.Get("fmt") == "json";
+ ctx.Register(new TQueryProcessor(ev->Sender, json));
+ }
+ }
+
+ TString GetCertInfoJson() const {
+ NJson::TJsonValue json(NJson::JSON_MAP);
+ if (const TString cert = Common ? Common->Settings.Certificate : TString()) {
+ struct TEx : yexception {};
+ try {
+ const auto& cert = Common->Settings.Certificate;
+ std::unique_ptr<BIO, void(*)(BIO*)> bio(BIO_new_mem_buf(cert.data(), cert.size()), &BIO_vfree);
+ if (!bio) {
+ throw TEx() << "BIO_new_mem_buf failed";
+ }
+ std::unique_ptr<X509, void(*)(X509*)> x509(PEM_read_bio_X509(bio.get(), nullptr, nullptr, nullptr),
+ &X509_free);
+ if (!x509) {
+ throw TEx() << "PEM_read_bio_X509 failed";
+ }
+ X509_NAME *name = X509_get_subject_name(x509.get());
+ if (!name) {
+ throw TEx() << "X509_get_subject_name failed";
+ }
+ char buffer[4096];
+ if (char *p = X509_NAME_oneline(name, buffer, sizeof(buffer))) {
+ json["Subject"] = p;
+ }
+ if (int loc = X509_NAME_get_index_by_NID(name, NID_commonName, -1); loc >= 0) {
+ if (X509_NAME_ENTRY *entry = X509_NAME_get_entry(name, loc)) {
+ if (ASN1_STRING *data = X509_NAME_ENTRY_get_data(entry)) {
+ unsigned char *cn;
+ if (const int len = ASN1_STRING_to_UTF8(&cn, data); len >= 0) {
+ json["CommonName"] = TString(reinterpret_cast<char*>(cn), len);
+ OPENSSL_free(cn);
+ }
+ }
+ }
+ }
+ auto time = [](const ASN1_TIME *t, const char *name) -> TString {
+ if (t) {
+ struct tm tm;
+ if (ASN1_TIME_to_tm(t, &tm)) {
+ return Strftime("%Y-%m-%dT%H:%M:%S%z", &tm);
+ } else {
+ throw TEx() << "ASN1_TIME_to_tm failed";
+ }
+ } else {
+ throw TEx() << name << " failed";
+ }
+ };
+ json["NotBefore"] = time(X509_get0_notBefore(x509.get()), "X509_get0_notBefore");
+ json["NotAfter"] = time(X509_get0_notAfter(x509.get()), "X509_get0_notAfter");
+ } catch (const TEx& ex) {
+ json["Error"] = ex.what();
+ }
+ }
+ TStringStream str(NMonitoring::HTTPOKJSON);
+ NJson::WriteJson(&str, &json);
+ return str.Str();
+ }
+ };
+
+ IActor *CreateInterconnectMonActor(TIntrusivePtr<TInterconnectProxyCommon> common) {
+ return new TInterconnectMonActor(std::move(common));
+ }
+
+} // NInterconnect
diff --git a/library/cpp/actors/interconnect/interconnect_mon.h b/library/cpp/actors/interconnect/interconnect_mon.h
new file mode 100644
index 0000000000..3fb26053fb
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_mon.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include <library/cpp/actors/core/actor.h>
+#include "interconnect_common.h"
+
+namespace NInterconnect {
+
+ NActors::IActor *CreateInterconnectMonActor(TIntrusivePtr<NActors::TInterconnectProxyCommon> common = nullptr);
+
+ static inline NActors::TActorId MakeInterconnectMonActorId(ui32 nodeId) {
+ char s[12] = {'I', 'C', 'O', 'v', 'e', 'r', 'v', 'i', 'e', 'w', 0, 0};
+ return NActors::TActorId(nodeId, TStringBuf(s, 12));
+ }
+
+} // NInterconnect
diff --git a/library/cpp/actors/interconnect/interconnect_nameserver_base.h b/library/cpp/actors/interconnect/interconnect_nameserver_base.h
new file mode 100644
index 0000000000..df614f6c2b
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_nameserver_base.h
@@ -0,0 +1,83 @@
+#include "interconnect.h"
+#include "interconnect_impl.h"
+#include "interconnect_address.h"
+#include "events_local.h"
+
+#include <library/cpp/actors/core/hfunc.h>
+#include <library/cpp/actors/memory_log/memlog.h>
+
+namespace NActors {
+
+ template<typename TDerived>
+ class TInterconnectNameserverBase : public TActor<TDerived> {
+ protected:
+ const TMap<ui32, TTableNameserverSetup::TNodeInfo>& NodeTable;
+
+ TInterconnectNameserverBase(void (TDerived::*func)(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx)
+ , const TMap<ui32, TTableNameserverSetup::TNodeInfo>& nodeTable)
+ : TActor<TDerived>(func)
+ , NodeTable(nodeTable)
+ {
+ }
+ public:
+
+ void HandleMissedNodeId(TEvInterconnect::TEvResolveNode::TPtr& ev,
+ const TActorContext& ctx,
+ const TInstant&) {
+ auto reply = new TEvLocalNodeInfo;
+ reply->NodeId = ev->Get()->Record.GetNodeId();
+ ctx.Send(ev->Sender, reply);
+ }
+
+ void Handle(TEvInterconnect::TEvResolveNode::TPtr& ev,
+ const TActorContext& ctx) {
+ const TEvInterconnect::TEvResolveNode* request = ev->Get();
+ auto& record = request->Record;
+ const ui32 nodeId = record.GetNodeId();
+ const TInstant deadline = record.HasDeadline() ? TInstant::FromValue(record.GetDeadline()) : TInstant::Max();
+ auto it = NodeTable.find(nodeId);
+
+ if (it == NodeTable.end()) {
+ static_cast<TDerived*>(this)->HandleMissedNodeId(ev, ctx, deadline);
+ } else {
+ IActor::RegisterWithSameMailbox(
+ CreateResolveActor(nodeId, it->second, ev->Sender, this->SelfId(), deadline));
+ }
+ }
+
+ void Handle(TEvResolveAddress::TPtr& ev,
+ const TActorContext&) {
+ const TEvResolveAddress* request = ev->Get();
+
+ IActor::RegisterWithSameMailbox(
+ CreateResolveActor(request->Address, request->Port, ev->Sender, this->SelfId(), TInstant::Max()));
+ }
+
+ void Handle(TEvInterconnect::TEvListNodes::TPtr& ev,
+ const TActorContext& ctx) {
+ THolder<TEvInterconnect::TEvNodesInfo>
+ reply(new TEvInterconnect::TEvNodesInfo());
+ reply->Nodes.reserve(NodeTable.size());
+ for (const auto& pr : NodeTable) {
+ reply->Nodes.emplace_back(pr.first,
+ pr.second.Address, pr.second.Host, pr.second.ResolveHost,
+ pr.second.Port, pr.second.Location);
+ }
+ ctx.Send(ev->Sender, reply.Release());
+ }
+
+ void Handle(TEvInterconnect::TEvGetNode::TPtr& ev,
+ const TActorContext& ctx) {
+ ui32 nodeId = ev->Get()->NodeId;
+ THolder<TEvInterconnect::TEvNodeInfo>
+ reply(new TEvInterconnect::TEvNodeInfo(nodeId));
+ auto it = NodeTable.find(nodeId);
+ if (it != NodeTable.end()) {
+ reply->Node = MakeHolder<TEvInterconnect::TNodeInfo>(it->first, it->second.Address,
+ it->second.Host, it->second.ResolveHost,
+ it->second.Port, it->second.Location);
+ }
+ ctx.Send(ev->Sender, reply.Release());
+ }
+ };
+}
diff --git a/library/cpp/actors/interconnect/interconnect_nameserver_dynamic.cpp b/library/cpp/actors/interconnect/interconnect_nameserver_dynamic.cpp
new file mode 100644
index 0000000000..5e48401b14
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_nameserver_dynamic.cpp
@@ -0,0 +1,178 @@
+#include "interconnect.h"
+#include "interconnect_impl.h"
+#include "interconnect_address.h"
+#include "interconnect_nameserver_base.h"
+#include "events_local.h"
+#include "logging.h"
+
+#include <library/cpp/actors/core/hfunc.h>
+#include <library/cpp/actors/core/log.h>
+
+namespace NActors {
+
+ class TInterconnectDynamicNameserver
+ : public TInterconnectNameserverBase<TInterconnectDynamicNameserver>
+ , public TInterconnectLoggingBase
+ {
+ struct TPendingRequest {
+ TEvInterconnect::TEvResolveNode::TPtr Request;
+ TInstant Deadline;
+
+ TPendingRequest(TEvInterconnect::TEvResolveNode::TPtr request, const TInstant& deadline)
+ : Request(request), Deadline(deadline)
+ {
+ }
+ };
+
+ TMap<ui32, TTableNameserverSetup::TNodeInfo> NodeTable;
+ TVector<TPendingRequest> PendingRequests;
+ TDuration PendingPeriod;
+
+ void PrintInfo() {
+ TString logMsg = TStringBuilder() << "Table size: " << NodeTable.size();
+ for (const auto& [nodeId, node] : NodeTable) {
+ TString str = TStringBuilder() << "\n > Node " << nodeId << " `" << node.Address << "`:" << node.Port << ", host: " << node.Host << ", resolveHost: " << node.ResolveHost;
+ logMsg += str;
+ }
+ LOG_DEBUG_IC("ICN01", "%s", logMsg.c_str());
+ }
+
+ bool IsNodeUpdated(const ui32 nodeId, const TString& address, const ui32 port) {
+ bool printInfo = false;
+ auto it = NodeTable.find(nodeId);
+ if (it == NodeTable.end()) {
+ LOG_DEBUG_IC("ICN02", "New node %u `%s`: %u",
+ nodeId, address.c_str(), port);
+ printInfo = true;
+ } else if (it->second.Address != address || it->second.Port != port) {
+ LOG_DEBUG_IC("ICN03", "Updated node %u `%s`: %u (from `%s`: %u)",
+ nodeId, address.c_str(), port, it->second.Address.c_str(), it->second.Port);
+ printInfo = true;
+ Send(TActivationContext::InterconnectProxy(nodeId), new TEvInterconnect::TEvDisconnect);
+ }
+ return printInfo;
+ }
+
+ void DiscardTimedOutRequests(const TActorContext& ctx, ui32 compactionCount = 0) {
+
+ auto now = Now();
+
+ for (auto& pending : PendingRequests) {
+ if (pending.Deadline > now) {
+ LOG_ERROR_IC("ICN06", "Unknown nodeId: %u", pending.Request->Get()->Record.GetNodeId());
+ auto reply = new TEvLocalNodeInfo;
+ reply->NodeId = pending.Request->Get()->Record.GetNodeId();
+ ctx.Send(pending.Request->Sender, reply);
+ pending.Request.Reset();
+ compactionCount++;
+ }
+ }
+
+ if (compactionCount) {
+ TVector<TPendingRequest> requests;
+ if (compactionCount < PendingRequests.size()) { // sanity check
+ requests.reserve(PendingRequests.size() - compactionCount);
+ }
+ for (auto& pending : PendingRequests) {
+ if (pending.Request) {
+ requests.emplace_back(pending.Request, pending.Deadline);
+ }
+ }
+ PendingRequests.swap(requests);
+ }
+ }
+
+ void SchedulePeriodic() {
+ Schedule(TDuration::MilliSeconds(200), new TEvents::TEvWakeup());
+ }
+
+ public:
+ static constexpr EActivityType ActorActivityType() {
+ return NAMESERVICE;
+ }
+
+ TInterconnectDynamicNameserver(const TIntrusivePtr<TTableNameserverSetup>& setup, const TDuration& pendingPeriod, ui32 /*resolvePoolId*/ )
+ : TInterconnectNameserverBase<TInterconnectDynamicNameserver>(&TInterconnectDynamicNameserver::StateFunc, NodeTable)
+ , NodeTable(setup->StaticNodeTable)
+ , PendingPeriod(pendingPeriod)
+ {
+ Y_VERIFY(setup->IsEntriesUnique());
+ }
+
+ STFUNC(StateFunc) {
+ try {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(TEvInterconnect::TEvResolveNode, Handle);
+ HFunc(TEvResolveAddress, Handle);
+ HFunc(TEvInterconnect::TEvListNodes, Handle);
+ HFunc(TEvInterconnect::TEvGetNode, Handle);
+ HFunc(TEvInterconnect::TEvNodesInfo, HandleUpdate);
+ CFunc(TEvents::TEvWakeup::EventType, HandlePeriodic);
+ }
+ } catch (...) {
+ LOG_ERROR_IC("ICN09", "%s", CurrentExceptionMessage().c_str());
+ }
+ }
+
+ void HandleMissedNodeId(TEvInterconnect::TEvResolveNode::TPtr& ev,
+ const TActorContext& ctx,
+ const TInstant& deadline) {
+ if (PendingPeriod) {
+ if (PendingRequests.size() == 0) {
+ SchedulePeriodic();
+ }
+ PendingRequests.emplace_back(std::move(ev), Min(deadline, Now() + PendingPeriod));
+ } else {
+ LOG_ERROR_IC("ICN07", "Unknown nodeId: %u", ev->Get()->Record.GetNodeId());
+ TInterconnectNameserverBase::HandleMissedNodeId(ev, ctx, deadline);
+ }
+ }
+
+ void HandleUpdate(TEvInterconnect::TEvNodesInfo::TPtr& ev,
+ const TActorContext& ctx) {
+
+ auto request = ev->Get();
+ LOG_DEBUG_IC("ICN04", "Update TEvNodesInfo with sz: %lu ", request->Nodes.size());
+
+ bool printInfo = false;
+ ui32 compactionCount = 0;
+
+ for (const auto& node : request->Nodes) {
+ printInfo |= IsNodeUpdated(node.NodeId, node.Address, node.Port);
+
+ NodeTable[node.NodeId] = TTableNameserverSetup::TNodeInfo(
+ node.Address, node.Host, node.ResolveHost, node.Port, node.Location);
+
+ for (auto& pending : PendingRequests) {
+ if (pending.Request->Get()->Record.GetNodeId() == node.NodeId) {
+ LOG_DEBUG_IC("ICN05", "Pending nodeId: %u discovered", node.NodeId);
+ RegisterWithSameMailbox(
+ CreateResolveActor(node.NodeId, NodeTable[node.NodeId], pending.Request->Sender, SelfId(), pending.Deadline));
+ pending.Request.Reset();
+ compactionCount++;
+ }
+ }
+ }
+
+ if (printInfo) {
+ PrintInfo();
+ }
+
+ DiscardTimedOutRequests(ctx, compactionCount);
+ }
+
+ void HandlePeriodic(const TActorContext& ctx) {
+ DiscardTimedOutRequests(ctx, 0);
+ if (PendingRequests.size()) {
+ SchedulePeriodic();
+ }
+ }
+ };
+
+ IActor* CreateDynamicNameserver(const TIntrusivePtr<TTableNameserverSetup>& setup,
+ const TDuration& pendingPeriod,
+ ui32 poolId) {
+ return new TInterconnectDynamicNameserver(setup, pendingPeriod, poolId);
+ }
+
+}
diff --git a/library/cpp/actors/interconnect/interconnect_nameserver_table.cpp b/library/cpp/actors/interconnect/interconnect_nameserver_table.cpp
new file mode 100644
index 0000000000..43419bf70d
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_nameserver_table.cpp
@@ -0,0 +1,86 @@
+#include "interconnect.h"
+#include "interconnect_impl.h"
+#include "interconnect_address.h"
+#include "interconnect_nameserver_base.h"
+#include "events_local.h"
+
+#include <library/cpp/actors/core/hfunc.h>
+#include <library/cpp/actors/memory_log/memlog.h>
+
+namespace NActors {
+
+ class TInterconnectNameserverTable: public TInterconnectNameserverBase<TInterconnectNameserverTable> {
+ TIntrusivePtr<TTableNameserverSetup> Config;
+
+ public:
+ static constexpr EActivityType ActorActivityType() {
+ return NAMESERVICE;
+ }
+
+ TInterconnectNameserverTable(const TIntrusivePtr<TTableNameserverSetup>& setup, ui32 /*resolvePoolId*/)
+ : TInterconnectNameserverBase<TInterconnectNameserverTable>(&TInterconnectNameserverTable::StateFunc, setup->StaticNodeTable)
+ , Config(setup)
+ {
+ Y_VERIFY(Config->IsEntriesUnique());
+ }
+
+ STFUNC(StateFunc) {
+ try {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(TEvInterconnect::TEvResolveNode, Handle);
+ HFunc(TEvResolveAddress, Handle);
+ HFunc(TEvInterconnect::TEvListNodes, Handle);
+ HFunc(TEvInterconnect::TEvGetNode, Handle);
+ }
+ } catch (...) {
+ // on error - do nothing
+ }
+ }
+ };
+
+ IActor* CreateNameserverTable(const TIntrusivePtr<TTableNameserverSetup>& setup, ui32 poolId) {
+ return new TInterconnectNameserverTable(setup, poolId);
+ }
+
+ bool TTableNameserverSetup::IsEntriesUnique() const {
+ TVector<const TNodeInfo*> infos;
+ infos.reserve(StaticNodeTable.size());
+ for (const auto& x : StaticNodeTable)
+ infos.push_back(&x.second);
+
+ auto CompareAddressLambda =
+ [](const TNodeInfo* left, const TNodeInfo* right) {
+ return left->Port == right->Port ? left->Address < right->Address : left->Port < right->Port;
+ };
+
+ Sort(infos, CompareAddressLambda);
+
+ for (ui32 idx = 1, end = StaticNodeTable.size(); idx < end; ++idx) {
+ const TNodeInfo* left = infos[idx - 1];
+ const TNodeInfo* right = infos[idx];
+ if (left->Address && left->Address == right->Address && left->Port == right->Port)
+ return false;
+ }
+
+ auto CompareHostLambda =
+ [](const TNodeInfo* left, const TNodeInfo* right) {
+ return left->Port == right->Port ? left->ResolveHost < right->ResolveHost : left->Port < right->Port;
+ };
+
+ Sort(infos, CompareHostLambda);
+
+ for (ui32 idx = 1, end = StaticNodeTable.size(); idx < end; ++idx) {
+ const TNodeInfo* left = infos[idx - 1];
+ const TNodeInfo* right = infos[idx];
+ if (left->ResolveHost == right->ResolveHost && left->Port == right->Port)
+ return false;
+ }
+
+ return true;
+ }
+
+ TActorId GetNameserviceActorId() {
+ return TActorId(0, "namesvc");
+ }
+
+}
diff --git a/library/cpp/actors/interconnect/interconnect_proxy_wrapper.cpp b/library/cpp/actors/interconnect/interconnect_proxy_wrapper.cpp
new file mode 100644
index 0000000000..1c44b4c59b
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_proxy_wrapper.cpp
@@ -0,0 +1,47 @@
+#include "interconnect_proxy_wrapper.h"
+#include "interconnect_tcp_proxy.h"
+#include <library/cpp/actors/interconnect/mock/ic_mock.h>
+
+namespace NActors {
+
+ class TInterconnectProxyWrapper : public IActor {
+ TIntrusivePtr<TInterconnectProxyCommon> Common;
+ const ui32 NodeId;
+ TInterconnectMock *Mock;
+ IActor *Proxy = nullptr;
+
+ public:
+ TInterconnectProxyWrapper(TIntrusivePtr<TInterconnectProxyCommon> common, ui32 nodeId, TInterconnectMock *mock)
+ : IActor(static_cast<TReceiveFunc>(&TInterconnectProxyWrapper::StateFunc), INTERCONNECT_PROXY_WRAPPER)
+ , Common(std::move(common))
+ , NodeId(nodeId)
+ , Mock(mock)
+ {}
+
+ STFUNC(StateFunc) {
+ if (ev->GetTypeRewrite() == TEvents::TSystem::Poison && !Proxy) {
+ PassAway();
+ } else {
+ if (!Proxy) {
+ IActor *actor = Mock
+ ? Mock->CreateProxyMock(TActivationContext::ActorSystem()->NodeId, NodeId, Common)
+ : new TInterconnectProxyTCP(NodeId, Common, &Proxy);
+ RegisterWithSameMailbox(actor);
+ if (Mock) {
+ Proxy = actor;
+ }
+ Y_VERIFY(Proxy);
+ }
+ InvokeOtherActor(*Proxy, &IActor::Receive, ev, ctx);
+ }
+ }
+ };
+
+ TProxyWrapperFactory CreateProxyWrapperFactory(TIntrusivePtr<TInterconnectProxyCommon> common, ui32 poolId,
+ TInterconnectMock *mock) {
+ return [=](TActorSystem *as, ui32 nodeId) -> TActorId {
+ return as->Register(new TInterconnectProxyWrapper(common, nodeId, mock), TMailboxType::HTSwap, poolId);
+ };
+ }
+
+} // NActors
diff --git a/library/cpp/actors/interconnect/interconnect_proxy_wrapper.h b/library/cpp/actors/interconnect/interconnect_proxy_wrapper.h
new file mode 100644
index 0000000000..e5942351a7
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_proxy_wrapper.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "interconnect_common.h"
+
+#include <library/cpp/actors/core/actorsystem.h>
+
+namespace NActors {
+
+ TProxyWrapperFactory CreateProxyWrapperFactory(TIntrusivePtr<TInterconnectProxyCommon> common, ui32 poolId,
+ class TInterconnectMock *mock = nullptr);
+
+}
diff --git a/library/cpp/actors/interconnect/interconnect_resolve.cpp b/library/cpp/actors/interconnect/interconnect_resolve.cpp
new file mode 100644
index 0000000000..14296194df
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_resolve.cpp
@@ -0,0 +1,174 @@
+#include "interconnect.h"
+#include "interconnect_address.h"
+#include "events_local.h"
+
+#include <library/cpp/actors/core/actor_bootstrapped.h>
+#include <library/cpp/actors/core/hfunc.h>
+#include <library/cpp/actors/dnsresolver/dnsresolver.h>
+
+namespace NActors {
+
+ using namespace NActors::NDnsResolver;
+
+ class TInterconnectResolveActor : public TActorBootstrapped<TInterconnectResolveActor> {
+ public:
+ TInterconnectResolveActor(
+ const TString& host, ui16 port, ui32 nodeId, const TString& defaultAddress,
+ const TActorId& replyTo, const TActorId& replyFrom, TInstant deadline)
+ : Host(host)
+ , NodeId(nodeId)
+ , Port(port)
+ , DefaultAddress(defaultAddress)
+ , ReplyTo(replyTo)
+ , ReplyFrom(replyFrom)
+ , Deadline(deadline)
+ { }
+
+ TInterconnectResolveActor(
+ const TString& host, ui16 port,
+ const TActorId& replyTo, const TActorId& replyFrom, TInstant deadline)
+ : Host(host)
+ , Port(port)
+ , ReplyTo(replyTo)
+ , ReplyFrom(replyFrom)
+ , Deadline(deadline)
+ { }
+
+ static constexpr EActivityType ActorActivityType() {
+ return NAMESERVICE;
+ }
+
+ void Bootstrap() {
+ TMaybe<TString> errorText;
+ if (auto addr = ExtractDefaultAddr(errorText)) {
+ return SendAddrAndDie(std::move(addr));
+ }
+
+ if (errorText) {
+ SendErrorAndDie(*errorText);
+ }
+
+ auto now = TActivationContext::Now();
+ if (Deadline < now) {
+ SendErrorAndDie("Deadline");
+ return;
+ }
+
+ Send(MakeDnsResolverActorId(),
+ new TEvDns::TEvGetAddr(Host, AF_UNSPEC),
+ IEventHandle::FlagTrackDelivery);
+
+ if (Deadline != TInstant::Max()) {
+ Schedule(Deadline, new TEvents::TEvWakeup);
+ }
+
+ Become(&TThis::StateWork);
+ }
+
+ STRICT_STFUNC(StateWork, {
+ sFunc(TEvents::TEvWakeup, HandleTimeout);
+ sFunc(TEvents::TEvUndelivered, HandleUndelivered);
+ hFunc(TEvDns::TEvGetAddrResult, Handle);
+ });
+
+ void HandleTimeout() {
+ SendErrorAndDie("Deadline");
+ }
+
+ void HandleUndelivered() {
+ SendErrorAndDie("Dns resolver is unavailable");
+ }
+
+ void Handle(TEvDns::TEvGetAddrResult::TPtr& ev) {
+ if (auto addr = ExtractAddr(ev->Get())) {
+ return SendAddrAndDie(std::move(addr));
+ }
+
+ SendErrorAndDie(ev->Get()->ErrorText);
+ }
+
+ void SendAddrAndDie(NAddr::IRemoteAddrPtr addr) {
+ if (NodeId) {
+ auto reply = new TEvLocalNodeInfo;
+ reply->NodeId = *NodeId;
+ reply->Address = std::move(addr);
+ TActivationContext::Send(new IEventHandle(ReplyTo, ReplyFrom, reply));
+ } else {
+ auto reply = new TEvAddressInfo;
+ reply->Address = std::move(addr);
+ TActivationContext::Send(new IEventHandle(ReplyTo, ReplyFrom, reply));
+ }
+ PassAway();
+ }
+
+ void SendErrorAndDie(const TString& errorText) {
+ auto *event = new TEvResolveError;
+ event->Explain = errorText;
+ TActivationContext::Send(new IEventHandle(ReplyTo, ReplyFrom, event));
+ PassAway();
+ }
+
+ NAddr::IRemoteAddrPtr ExtractAddr(TEvDns::TEvGetAddrResult* msg) {
+ if (msg->Status == 0) {
+ if (msg->IsV6()) {
+ struct sockaddr_in6 sin6;
+ Zero(sin6);
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = msg->GetAddrV6();
+ sin6.sin6_port = HostToInet(Port);
+ return MakeHolder<NAddr::TIPv6Addr>(sin6);
+ }
+
+ if (msg->IsV4()) {
+ return MakeHolder<NAddr::TIPv4Addr>(TIpAddress(msg->GetAddrV4().s_addr, Port));
+ }
+
+ Y_FAIL("Unexpected result address family");
+ }
+
+ return nullptr;
+ }
+
+ NAddr::IRemoteAddrPtr ExtractDefaultAddr(TMaybe<TString>& errorText) {
+ if (DefaultAddress) {
+ NInterconnect::TAddress address(DefaultAddress.data(), Port);
+
+ switch (address.GetFamily()) {
+ case AF_INET:
+ return MakeHolder<NAddr::TIPv4Addr>(*(sockaddr_in*)address.SockAddr());
+ case AF_INET6:
+ return MakeHolder<NAddr::TIPv6Addr>(*(sockaddr_in6*)address.SockAddr());
+ default:
+ errorText = "Unsupported default address: " + DefaultAddress;
+ break;
+ }
+ }
+
+ return nullptr;
+ }
+
+ private:
+ const TString Host;
+ const std::optional<ui32> NodeId;
+ const ui16 Port;
+ const TString DefaultAddress;
+ const TActorId ReplyTo;
+ const TActorId ReplyFrom;
+ const TInstant Deadline;
+ };
+
+ IActor* CreateResolveActor(
+ const TString& host, ui16 port, ui32 nodeId, const TString& defaultAddress,
+ const TActorId& replyTo, const TActorId& replyFrom, TInstant deadline)
+ {
+ return new TInterconnectResolveActor(host, port, nodeId, defaultAddress, replyTo, replyFrom, deadline);
+ }
+
+ IActor* CreateResolveActor(
+ const TString& host, ui16 port,
+ const TActorId& replyTo, const TActorId& replyFrom, TInstant deadline)
+ {
+ return new TInterconnectResolveActor(host, port, replyTo, replyFrom, deadline);
+ }
+
+} // namespace NActors
diff --git a/library/cpp/actors/interconnect/interconnect_stream.cpp b/library/cpp/actors/interconnect/interconnect_stream.cpp
new file mode 100644
index 0000000000..158ebc9e1d
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_stream.cpp
@@ -0,0 +1,628 @@
+#include "interconnect_stream.h"
+#include "logging.h"
+#include <library/cpp/openssl/init/init.h>
+#include <util/network/socket.h>
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+#include <openssl/pem.h>
+
+#if defined(_win_)
+#include <util/system/file.h>
+#define SOCK_NONBLOCK 0
+#elif defined(_darwin_)
+#define SOCK_NONBLOCK 0
+#else
+#include <sys/un.h>
+#include <sys/stat.h>
+#endif //_win_
+
+#if !defined(_win_)
+#include <sys/ioctl.h>
+#endif
+
+#include <cerrno>
+
+namespace NInterconnect {
+ namespace {
+ inline int
+ LastSocketError() {
+#if defined(_win_)
+ return WSAGetLastError();
+#else
+ return errno;
+#endif
+ }
+ }
+
+ TSocket::TSocket(SOCKET fd)
+ : Descriptor(fd)
+ {
+ }
+
+ TSocket::~TSocket() {
+ if (Descriptor == INVALID_SOCKET) {
+ return;
+ }
+
+ auto const result = ::closesocket(Descriptor);
+ if (result == 0)
+ return;
+ switch (LastSocketError()) {
+ case EBADF:
+ Y_FAIL("Close bad descriptor");
+ case EINTR:
+ break;
+ case EIO:
+ Y_FAIL("EIO");
+ default:
+ Y_FAIL("It's something unexpected");
+ }
+ }
+
+ int TSocket::GetDescriptor() {
+ return Descriptor;
+ }
+
+ int
+ TSocket::Bind(const TAddress& addr) const {
+ const auto ret = ::bind(Descriptor, addr.SockAddr(), addr.Size());
+ if (ret < 0)
+ return -LastSocketError();
+
+ return 0;
+ }
+
+ int
+ TSocket::Shutdown(int how) const {
+ const auto ret = ::shutdown(Descriptor, how);
+ if (ret < 0)
+ return -LastSocketError();
+
+ return 0;
+ }
+
+ int TSocket::GetConnectStatus() const {
+ int err = 0;
+ socklen_t len = sizeof(err);
+ if (getsockopt(Descriptor, SOL_SOCKET, SO_ERROR, reinterpret_cast<char*>(&err), &len) == -1) {
+ err = LastSocketError();
+ }
+ return err;
+ }
+
+ /////////////////////////////////////////////////////////////////
+
+ TIntrusivePtr<TStreamSocket> TStreamSocket::Make(int domain) {
+ const SOCKET res = ::socket(domain, SOCK_STREAM | SOCK_NONBLOCK, 0);
+ if (res == -1) {
+ const int err = LastSocketError();
+ Y_VERIFY(err != EMFILE && err != ENFILE);
+ }
+ return MakeIntrusive<TStreamSocket>(res);
+ }
+
+ TStreamSocket::TStreamSocket(SOCKET fd)
+ : TSocket(fd)
+ {
+ }
+
+ ssize_t
+ TStreamSocket::Send(const void* msg, size_t len, TString* /*err*/) const {
+ const auto ret = ::send(Descriptor, static_cast<const char*>(msg), int(len), 0);
+ if (ret < 0)
+ return -LastSocketError();
+
+ return ret;
+ }
+
+ ssize_t
+ TStreamSocket::Recv(void* buf, size_t len, TString* /*err*/) const {
+ const auto ret = ::recv(Descriptor, static_cast<char*>(buf), int(len), 0);
+ if (ret < 0)
+ return -LastSocketError();
+
+ return ret;
+ }
+
+ ssize_t
+ TStreamSocket::WriteV(const struct iovec* iov, int iovcnt) const {
+#ifndef _win_
+ const auto ret = ::writev(Descriptor, iov, iovcnt);
+ if (ret < 0)
+ return -LastSocketError();
+ return ret;
+#else
+ Y_FAIL("WriteV() unsupported on Windows");
+#endif
+ }
+
+ ssize_t
+ TStreamSocket::ReadV(const struct iovec* iov, int iovcnt) const {
+#ifndef _win_
+ const auto ret = ::readv(Descriptor, iov, iovcnt);
+ if (ret < 0)
+ return -LastSocketError();
+ return ret;
+#else
+ Y_FAIL("ReadV() unsupported on Windows");
+#endif
+ }
+
+ ssize_t TStreamSocket::GetUnsentQueueSize() const {
+ int num = -1;
+#ifndef _win_ // we have no means to determine output queue size on Windows
+ if (ioctl(Descriptor, TIOCOUTQ, &num) == -1) {
+ num = -1;
+ }
+#endif
+ return num;
+ }
+
+ int
+ TStreamSocket::Connect(const TAddress& addr) const {
+ const auto ret = ::connect(Descriptor, addr.SockAddr(), addr.Size());
+ if (ret < 0)
+ return -LastSocketError();
+
+ return ret;
+ }
+
+ int
+ TStreamSocket::Connect(const NAddr::IRemoteAddr* addr) const {
+ const auto ret = ::connect(Descriptor, addr->Addr(), addr->Len());
+ if (ret < 0)
+ return -LastSocketError();
+
+ return ret;
+ }
+
+ int
+ TStreamSocket::Listen(int backlog) const {
+ const auto ret = ::listen(Descriptor, backlog);
+ if (ret < 0)
+ return -LastSocketError();
+
+ return ret;
+ }
+
+ int
+ TStreamSocket::Accept(TAddress& acceptedAddr) const {
+ socklen_t acceptedSize = sizeof(::sockaddr_in6);
+ const auto ret = ::accept(Descriptor, acceptedAddr.SockAddr(), &acceptedSize);
+ if (ret == INVALID_SOCKET)
+ return -LastSocketError();
+
+ return ret;
+ }
+
+ void
+ TStreamSocket::SetSendBufferSize(i32 len) const {
+ (void)SetSockOpt(Descriptor, SOL_SOCKET, SO_SNDBUF, len);
+ }
+
+ ui32 TStreamSocket::GetSendBufferSize() const {
+ ui32 res = 0;
+ CheckedGetSockOpt(Descriptor, SOL_SOCKET, SO_SNDBUF, res, "SO_SNDBUF");
+ return res;
+ }
+
+ //////////////////////////////////////////////////////
+
+ TDatagramSocket::TPtr TDatagramSocket::Make(int domain) {
+ const SOCKET res = ::socket(domain, SOCK_DGRAM, 0);
+ if (res == -1) {
+ const int err = LastSocketError();
+ Y_VERIFY(err != EMFILE && err != ENFILE);
+ }
+ return std::make_shared<TDatagramSocket>(res);
+ }
+
+ TDatagramSocket::TDatagramSocket(SOCKET fd)
+ : TSocket(fd)
+ {
+ }
+
+ ssize_t
+ TDatagramSocket::SendTo(const void* msg, size_t len, const TAddress& toAddr) const {
+ const auto ret = ::sendto(Descriptor, static_cast<const char*>(msg), int(len), 0, toAddr.SockAddr(), toAddr.Size());
+ if (ret < 0)
+ return -LastSocketError();
+
+ return ret;
+ }
+
+ ssize_t
+ TDatagramSocket::RecvFrom(void* buf, size_t len, TAddress& fromAddr) const {
+ socklen_t fromSize = sizeof(::sockaddr_in6);
+ const auto ret = ::recvfrom(Descriptor, static_cast<char*>(buf), int(len), 0, fromAddr.SockAddr(), &fromSize);
+ if (ret < 0)
+ return -LastSocketError();
+
+ return ret;
+ }
+
+
+ // deleter for SSL objects
+ struct TDeleter {
+ void operator ()(BIO *bio) const {
+ BIO_free(bio);
+ }
+
+ void operator ()(X509 *x509) const {
+ X509_free(x509);
+ }
+
+ void operator ()(RSA *rsa) const {
+ RSA_free(rsa);
+ }
+
+ void operator ()(SSL_CTX *ctx) const {
+ SSL_CTX_free(ctx);
+ }
+ };
+
+ class TSecureSocketContext::TImpl {
+ std::unique_ptr<SSL_CTX, TDeleter> Ctx;
+
+ public:
+ TImpl(const TString& certificate, const TString& privateKey, const TString& caFilePath,
+ const TString& ciphers) {
+ int ret;
+ InitOpenSSL();
+#if OPENSSL_VERSION_NUMBER < 0x10100000L
+ Ctx.reset(SSL_CTX_new(TLSv1_2_method()));
+ Y_VERIFY(Ctx, "SSL_CTX_new() failed");
+#else
+ Ctx.reset(SSL_CTX_new(TLS_method()));
+ Y_VERIFY(Ctx, "SSL_CTX_new() failed");
+ ret = SSL_CTX_set_min_proto_version(Ctx.get(), TLS1_2_VERSION);
+ Y_VERIFY(ret == 1, "failed to set min proto version");
+ ret = SSL_CTX_set_max_proto_version(Ctx.get(), TLS1_2_VERSION);
+ Y_VERIFY(ret == 1, "failed to set max proto version");
+#endif
+ SSL_CTX_set_verify(Ctx.get(), SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT, &Verify);
+ SSL_CTX_set_mode(*this, SSL_MODE_ENABLE_PARTIAL_WRITE | SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER);
+
+ // apply certificates in SSL context
+ if (certificate) {
+ std::unique_ptr<BIO, TDeleter> bio(BIO_new_mem_buf(certificate.data(), certificate.size()));
+ Y_VERIFY(bio);
+
+ // first certificate in the chain is expected to be a leaf
+ std::unique_ptr<X509, TDeleter> cert(PEM_read_bio_X509(bio.get(), nullptr, nullptr, nullptr));
+ Y_VERIFY(cert, "failed to parse certificate");
+ ret = SSL_CTX_use_certificate(Ctx.get(), cert.get());
+ Y_VERIFY(ret == 1);
+
+ // loading additional certificates in the chain, if any
+ while(true) {
+ X509 *ca = PEM_read_bio_X509(bio.get(), nullptr, nullptr, nullptr);
+ if (ca == nullptr) {
+ break;
+ }
+ ret = SSL_CTX_add0_chain_cert(Ctx.get(), ca);
+ Y_VERIFY(ret == 1);
+ // we must not free memory if certificate was added successfully by SSL_CTX_add0_chain_cert
+ }
+ }
+ if (privateKey) {
+ std::unique_ptr<BIO, TDeleter> bio(BIO_new_mem_buf(privateKey.data(), privateKey.size()));
+ Y_VERIFY(bio);
+ std::unique_ptr<RSA, TDeleter> pkey(PEM_read_bio_RSAPrivateKey(bio.get(), nullptr, nullptr, nullptr));
+ Y_VERIFY(pkey);
+ ret = SSL_CTX_use_RSAPrivateKey(Ctx.get(), pkey.get());
+ Y_VERIFY(ret == 1);
+ }
+ if (caFilePath) {
+ ret = SSL_CTX_load_verify_locations(Ctx.get(), caFilePath.data(), nullptr);
+ Y_VERIFY(ret == 1);
+ }
+
+ int success = SSL_CTX_set_cipher_list(Ctx.get(), ciphers ? ciphers.data() : "AES128-GCM-SHA256");
+ Y_VERIFY(success, "failed to set cipher list");
+ }
+
+ operator SSL_CTX*() const {
+ return Ctx.get();
+ }
+
+ static int GetExIndex() {
+ static int index = SSL_get_ex_new_index(0, nullptr, nullptr, nullptr, nullptr);
+ return index;
+ }
+
+ private:
+ static int Verify(int preverify, X509_STORE_CTX *ctx) {
+ if (!preverify) {
+ X509 *badCert = X509_STORE_CTX_get_current_cert(ctx);
+ int err = X509_STORE_CTX_get_error(ctx);
+ int depth = X509_STORE_CTX_get_error_depth(ctx);
+ SSL *ssl = static_cast<SSL*>(X509_STORE_CTX_get_ex_data(ctx, SSL_get_ex_data_X509_STORE_CTX_idx()));
+ TString *errp = static_cast<TString*>(SSL_get_ex_data(ssl, GetExIndex()));
+ char buffer[1024];
+ X509_NAME_oneline(X509_get_subject_name(badCert), buffer, sizeof(buffer));
+ TStringBuilder s;
+ s << "Error during certificate validation"
+ << " error# " << X509_verify_cert_error_string(err)
+ << " depth# " << depth
+ << " cert# " << buffer;
+ if (err == X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT) {
+ X509_NAME_oneline(X509_get_issuer_name(badCert), buffer, sizeof(buffer));
+ s << " issuer# " << buffer;
+ }
+ *errp = s;
+ }
+ return preverify;
+ }
+ };
+
+ TSecureSocketContext::TSecureSocketContext(const TString& certificate, const TString& privateKey,
+ const TString& caFilePath, const TString& ciphers)
+ : Impl(new TImpl(certificate, privateKey, caFilePath, ciphers))
+ {}
+
+ TSecureSocketContext::~TSecureSocketContext()
+ {}
+
+ class TSecureSocket::TImpl {
+ SSL *Ssl;
+ TString ErrorDescription;
+ bool WantRead_ = false;
+ bool WantWrite_ = false;
+
+ public:
+ TImpl(SSL_CTX *ctx, int fd)
+ : Ssl(SSL_new(ctx))
+ {
+ Y_VERIFY(Ssl, "SSL_new() failed");
+ SSL_set_fd(Ssl, fd);
+ SSL_set_ex_data(Ssl, TSecureSocketContext::TImpl::GetExIndex(), &ErrorDescription);
+ }
+
+ ~TImpl() {
+ SSL_free(Ssl);
+ }
+
+ TString GetErrorStack() {
+ if (ErrorDescription) {
+ return ErrorDescription;
+ }
+ std::unique_ptr<BIO, int(*)(BIO*)> mem(BIO_new(BIO_s_mem()), BIO_free);
+ ERR_print_errors(mem.get());
+ char *p = nullptr;
+ auto len = BIO_get_mem_data(mem.get(), &p);
+ return TString(p, len);
+ }
+
+ EStatus ConvertResult(int res, TString& err) {
+ switch (res) {
+ case SSL_ERROR_NONE:
+ return EStatus::SUCCESS;
+
+ case SSL_ERROR_WANT_READ:
+ return EStatus::WANT_READ;
+
+ case SSL_ERROR_WANT_WRITE:
+ return EStatus::WANT_WRITE;
+
+ case SSL_ERROR_SYSCALL:
+ err = TStringBuilder() << "syscall error: " << strerror(LastSocketError()) << ": " << GetErrorStack();
+ break;
+
+ case SSL_ERROR_ZERO_RETURN:
+ err = "TLS negotiation failed";
+ break;
+
+ case SSL_ERROR_SSL:
+ err = "SSL error: " + GetErrorStack();
+ break;
+
+ default:
+ err = "unknown OpenSSL error";
+ break;
+ }
+ return EStatus::ERROR;
+ }
+
+ enum EConnectState {
+ CONNECT,
+ SHUTDOWN,
+ READ,
+ } ConnectState = EConnectState::CONNECT;
+
+ EStatus Establish(bool server, bool authOnly, TString& err) {
+ switch (ConnectState) {
+ case EConnectState::CONNECT: {
+ auto callback = server ? SSL_accept : SSL_connect;
+ const EStatus status = ConvertResult(SSL_get_error(Ssl, callback(Ssl)), err);
+ if (status != EStatus::SUCCESS || !authOnly) {
+ return status;
+ }
+ ConnectState = EConnectState::SHUTDOWN;
+ [[fallthrough]];
+ }
+
+ case EConnectState::SHUTDOWN: {
+ const int res = SSL_shutdown(Ssl);
+ if (res == 1) {
+ return EStatus::SUCCESS;
+ } else if (res != 0) {
+ return ConvertResult(SSL_get_error(Ssl, res), err);
+ }
+ ConnectState = EConnectState::READ;
+ [[fallthrough]];
+ }
+
+ case EConnectState::READ: {
+ char data[256];
+ size_t numRead = 0;
+ const int res = SSL_get_error(Ssl, SSL_read_ex(Ssl, data, sizeof(data), &numRead));
+ if (res == SSL_ERROR_ZERO_RETURN) {
+ return EStatus::SUCCESS;
+ } else if (res != SSL_ERROR_NONE) {
+ return ConvertResult(res, err);
+ } else if (numRead) {
+ err = "non-zero return from SSL_read_ex: " + ToString(numRead);
+ return EStatus::ERROR;
+ } else {
+ return EStatus::SUCCESS;
+ }
+ }
+ }
+ Y_FAIL();
+ }
+
+ std::optional<std::pair<const void*, size_t>> BlockedSend;
+
+ ssize_t Send(const void* msg, size_t len, TString *err) {
+ Y_VERIFY(!BlockedSend || *BlockedSend == std::make_pair(msg, len));
+ const ssize_t res = Operate(msg, len, &SSL_write_ex, err);
+ if (res == -EAGAIN) {
+ BlockedSend.emplace(msg, len);
+ } else {
+ BlockedSend.reset();
+ }
+ return res;
+ }
+
+ std::optional<std::pair<void*, size_t>> BlockedReceive;
+
+ ssize_t Recv(void* msg, size_t len, TString *err) {
+ Y_VERIFY(!BlockedReceive || *BlockedReceive == std::make_pair(msg, len));
+ const ssize_t res = Operate(msg, len, &SSL_read_ex, err);
+ if (res == -EAGAIN) {
+ BlockedReceive.emplace(msg, len);
+ } else {
+ BlockedReceive.reset();
+ }
+ return res;
+ }
+
+ TString GetCipherName() const {
+ return SSL_get_cipher_name(Ssl);
+ }
+
+ int GetCipherBits() const {
+ return SSL_get_cipher_bits(Ssl, nullptr);
+ }
+
+ TString GetProtocolName() const {
+ return SSL_get_cipher_version(Ssl);
+ }
+
+ TString GetPeerCommonName() const {
+ TString res;
+ if (X509 *cert = SSL_get_peer_certificate(Ssl)) {
+ char buffer[256];
+ memset(buffer, 0, sizeof(buffer));
+ if (X509_NAME *name = X509_get_subject_name(cert)) {
+ X509_NAME_get_text_by_NID(name, NID_commonName, buffer, sizeof(buffer));
+ }
+ X509_free(cert);
+ res = TString(buffer, strnlen(buffer, sizeof(buffer)));
+ }
+ return res;
+ }
+
+ bool WantRead() const {
+ return WantRead_;
+ }
+
+ bool WantWrite() const {
+ return WantWrite_;
+ }
+
+ private:
+ template<typename TBuffer, typename TOp>
+ ssize_t Operate(TBuffer* buffer, size_t len, TOp&& op, TString *err) {
+ WantRead_ = WantWrite_ = false;
+ size_t processed = 0;
+ int ret = op(Ssl, buffer, len, &processed);
+ if (ret == 1) {
+ return processed;
+ }
+ switch (const int status = SSL_get_error(Ssl, ret)) {
+ case SSL_ERROR_ZERO_RETURN:
+ return 0;
+
+ case SSL_ERROR_WANT_READ:
+ WantRead_ = true;
+ return -EAGAIN;
+
+ case SSL_ERROR_WANT_WRITE:
+ WantWrite_ = true;
+ return -EAGAIN;
+
+ case SSL_ERROR_SYSCALL:
+ return -LastSocketError();
+
+ case SSL_ERROR_SSL:
+ if (err) {
+ *err = GetErrorStack();
+ }
+ return -EPROTO;
+
+ default:
+ Y_FAIL("unexpected SSL_get_error() status# %d", status);
+ }
+ }
+ };
+
+ TSecureSocket::TSecureSocket(TStreamSocket& socket, TSecureSocketContext::TPtr context)
+ : TStreamSocket(socket.ReleaseDescriptor())
+ , Context(std::move(context))
+ , Impl(new TImpl(*Context->Impl, Descriptor))
+ {}
+
+ TSecureSocket::~TSecureSocket()
+ {}
+
+ TSecureSocket::EStatus TSecureSocket::Establish(bool server, bool authOnly, TString& err) const {
+ return Impl->Establish(server, authOnly, err);
+ }
+
+ TIntrusivePtr<TStreamSocket> TSecureSocket::Detach() {
+ return MakeIntrusive<TStreamSocket>(ReleaseDescriptor());
+ }
+
+ ssize_t TSecureSocket::Send(const void* msg, size_t len, TString *err) const {
+ return Impl->Send(msg, len, err);
+ }
+
+ ssize_t TSecureSocket::Recv(void* msg, size_t len, TString *err) const {
+ return Impl->Recv(msg, len, err);
+ }
+
+ ssize_t TSecureSocket::WriteV(const struct iovec* /*iov*/, int /*iovcnt*/) const {
+ Y_FAIL("unsupported on SSL sockets");
+ }
+
+ ssize_t TSecureSocket::ReadV(const struct iovec* /*iov*/, int /*iovcnt*/) const {
+ Y_FAIL("unsupported on SSL sockets");
+ }
+
+ TString TSecureSocket::GetCipherName() const {
+ return Impl->GetCipherName();
+ }
+
+ int TSecureSocket::GetCipherBits() const {
+ return Impl->GetCipherBits();
+ }
+
+ TString TSecureSocket::GetProtocolName() const {
+ return Impl->GetProtocolName();
+ }
+
+ TString TSecureSocket::GetPeerCommonName() const {
+ return Impl->GetPeerCommonName();
+ }
+
+ bool TSecureSocket::WantRead() const {
+ return Impl->WantRead();
+ }
+
+ bool TSecureSocket::WantWrite() const {
+ return Impl->WantWrite();
+ }
+
+}
diff --git a/library/cpp/actors/interconnect/interconnect_stream.h b/library/cpp/actors/interconnect/interconnect_stream.h
new file mode 100644
index 0000000000..074adc6e74
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_stream.h
@@ -0,0 +1,131 @@
+#pragma once
+
+#include <util/generic/string.h>
+#include <util/generic/noncopyable.h>
+#include <util/network/address.h>
+#include <util/network/init.h>
+#include <util/system/defaults.h>
+
+#include "poller.h"
+
+#include "interconnect_address.h"
+
+#include <memory>
+
+#include <sys/uio.h>
+
+namespace NInterconnect {
+ class TSocket: public NActors::TSharedDescriptor, public TNonCopyable {
+ protected:
+ TSocket(SOCKET fd);
+
+ virtual ~TSocket() override;
+
+ SOCKET Descriptor;
+
+ virtual int GetDescriptor() override;
+
+ private:
+ friend class TSecureSocket;
+
+ SOCKET ReleaseDescriptor() {
+ return std::exchange(Descriptor, INVALID_SOCKET);
+ }
+
+ public:
+ operator SOCKET() const {
+ return Descriptor;
+ }
+
+ int Bind(const TAddress& addr) const;
+ int Shutdown(int how) const;
+ int GetConnectStatus() const;
+ };
+
+ class TStreamSocket: public TSocket {
+ public:
+ TStreamSocket(SOCKET fd);
+
+ static TIntrusivePtr<TStreamSocket> Make(int domain);
+
+ virtual ssize_t Send(const void* msg, size_t len, TString *err = nullptr) const;
+ virtual ssize_t Recv(void* buf, size_t len, TString *err = nullptr) const;
+
+ virtual ssize_t WriteV(const struct iovec* iov, int iovcnt) const;
+ virtual ssize_t ReadV(const struct iovec* iov, int iovcnt) const;
+
+ int Connect(const TAddress& addr) const;
+ int Connect(const NAddr::IRemoteAddr* addr) const;
+ int Listen(int backlog) const;
+ int Accept(TAddress& acceptedAddr) const;
+
+ ssize_t GetUnsentQueueSize() const;
+
+ void SetSendBufferSize(i32 len) const;
+ ui32 GetSendBufferSize() const;
+ };
+
+ class TSecureSocketContext {
+ class TImpl;
+ THolder<TImpl> Impl;
+
+ friend class TSecureSocket;
+
+ public:
+ TSecureSocketContext(const TString& certificate, const TString& privateKey, const TString& caFilePath,
+ const TString& ciphers);
+ ~TSecureSocketContext();
+
+ public:
+ using TPtr = std::shared_ptr<TSecureSocketContext>;
+ };
+
+ class TSecureSocket : public TStreamSocket {
+ TSecureSocketContext::TPtr Context;
+
+ class TImpl;
+ THolder<TImpl> Impl;
+
+ public:
+ enum class EStatus {
+ SUCCESS,
+ ERROR,
+ WANT_READ,
+ WANT_WRITE,
+ };
+
+ public:
+ TSecureSocket(TStreamSocket& socket, TSecureSocketContext::TPtr context);
+ ~TSecureSocket();
+
+ EStatus Establish(bool server, bool authOnly, TString& err) const;
+ TIntrusivePtr<TStreamSocket> Detach();
+
+ ssize_t Send(const void* msg, size_t len, TString *err) const override;
+ ssize_t Recv(void* msg, size_t len, TString *err) const override;
+
+ ssize_t WriteV(const struct iovec* iov, int iovcnt) const override;
+ ssize_t ReadV(const struct iovec* iov, int iovcnt) const override;
+
+ TString GetCipherName() const;
+ int GetCipherBits() const;
+ TString GetProtocolName() const;
+ TString GetPeerCommonName() const;
+
+ bool WantRead() const;
+ bool WantWrite() const;
+ };
+
+ class TDatagramSocket: public TSocket {
+ public:
+ typedef std::shared_ptr<TDatagramSocket> TPtr;
+
+ TDatagramSocket(SOCKET fd);
+
+ static TPtr Make(int domain);
+
+ ssize_t SendTo(const void* msg, size_t len, const TAddress& toAddr) const;
+ ssize_t RecvFrom(void* buf, size_t len, TAddress& fromAddr) const;
+ };
+
+}
diff --git a/library/cpp/actors/interconnect/interconnect_tcp_input_session.cpp b/library/cpp/actors/interconnect/interconnect_tcp_input_session.cpp
new file mode 100644
index 0000000000..0abe9fe659
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_tcp_input_session.cpp
@@ -0,0 +1,476 @@
+#include "interconnect_tcp_session.h"
+#include "interconnect_tcp_proxy.h"
+#include <library/cpp/actors/core/probes.h>
+#include <library/cpp/actors/util/datetime.h>
+
+namespace NActors {
+ LWTRACE_USING(ACTORLIB_PROVIDER);
+
+ TInputSessionTCP::TInputSessionTCP(const TActorId& sessionId, TIntrusivePtr<NInterconnect::TStreamSocket> socket,
+ TIntrusivePtr<TReceiveContext> context, TInterconnectProxyCommon::TPtr common,
+ std::shared_ptr<IInterconnectMetrics> metrics, ui32 nodeId, ui64 lastConfirmed,
+ TDuration deadPeerTimeout, TSessionParams params)
+ : SessionId(sessionId)
+ , Socket(std::move(socket))
+ , Context(std::move(context))
+ , Common(std::move(common))
+ , NodeId(nodeId)
+ , Params(std::move(params))
+ , ConfirmedByInput(lastConfirmed)
+ , Metrics(std::move(metrics))
+ , DeadPeerTimeout(deadPeerTimeout)
+ {
+ Y_VERIFY(Context);
+ Y_VERIFY(Socket);
+ Y_VERIFY(SessionId);
+
+ AtomicSet(Context->PacketsReadFromSocket, 0);
+
+ Metrics->SetClockSkewMicrosec(0);
+
+ Context->UpdateState = EUpdateState::NONE;
+
+ // ensure that we do not spawn new session while the previous one is still alive
+ TAtomicBase sessions = AtomicIncrement(Context->NumInputSessions);
+ Y_VERIFY(sessions == 1, "sessions# %" PRIu64, ui64(sessions));
+ }
+
+ void TInputSessionTCP::Bootstrap() {
+ SetPrefix(Sprintf("InputSession %s [node %" PRIu32 "]", SelfId().ToString().data(), NodeId));
+ Become(&TThis::WorkingState, DeadPeerTimeout, new TEvCheckDeadPeer);
+ LOG_DEBUG_IC_SESSION("ICIS01", "InputSession created");
+ LastReceiveTimestamp = TActivationContext::Now();
+ ReceiveData();
+ }
+
+ void TInputSessionTCP::CloseInputSession() {
+ CloseInputSessionRequested = true;
+ ReceiveData();
+ }
+
+ void TInputSessionTCP::Handle(TEvPollerReady::TPtr ev) {
+ if (Context->ReadPending) {
+ Metrics->IncUsefulReadWakeups();
+ } else if (!ev->Cookie) {
+ Metrics->IncSpuriousReadWakeups();
+ }
+ Context->ReadPending = false;
+ ReceiveData();
+ if (Params.Encryption && Context->WriteBlockedByFullSendBuffer && !ev->Cookie) {
+ Send(SessionId, ev->Release().Release(), 0, 1);
+ }
+ }
+
+ void TInputSessionTCP::Handle(TEvPollerRegisterResult::TPtr ev) {
+ PollerToken = std::move(ev->Get()->PollerToken);
+ ReceiveData();
+ }
+
+ void TInputSessionTCP::HandleResumeReceiveData() {
+ ReceiveData();
+ }
+
+ void TInputSessionTCP::ReceiveData() {
+ TTimeLimit limit(GetMaxCyclesPerEvent());
+ ui64 numDataBytes = 0;
+ const size_t headerLen = Params.UseModernFrame ? sizeof(TTcpPacketHeader_v2) : sizeof(TTcpPacketHeader_v1);
+
+ LOG_DEBUG_IC_SESSION("ICIS02", "ReceiveData called");
+
+ for (int iteration = 0; Socket; ++iteration) {
+ if (iteration && limit.CheckExceeded()) {
+ // we have hit processing time limit for this message, send notification to resume processing a bit later
+ Send(SelfId(), new TEvResumeReceiveData);
+ break;
+ }
+
+ switch (State) {
+ case EState::HEADER:
+ if (IncomingData.GetSize() < headerLen) {
+ break;
+ } else {
+ ProcessHeader(headerLen);
+ }
+ continue;
+
+ case EState::PAYLOAD:
+ if (!IncomingData) {
+ break;
+ } else {
+ ProcessPayload(numDataBytes);
+ }
+ continue;
+ }
+
+ // if we have reached this point, it means that we do not have enough data in read buffer; try to obtain some
+ if (!ReadMore()) {
+ // we have no data from socket, so we have some free time to spend -- preallocate buffers using this time
+ PreallocateBuffers();
+ break;
+ }
+ }
+
+ // calculate ping time
+ auto it = std::min_element(PingQ.begin(), PingQ.end());
+ const TDuration ping = it != PingQ.end() ? *it : TDuration::Zero();
+
+ // send update to main session actor if something valuable has changed
+ if (!UpdateFromInputSession) {
+ UpdateFromInputSession = MakeHolder<TEvUpdateFromInputSession>(ConfirmedByInput, numDataBytes, ping);
+ } else {
+ Y_VERIFY(ConfirmedByInput >= UpdateFromInputSession->ConfirmedByInput);
+ UpdateFromInputSession->ConfirmedByInput = ConfirmedByInput;
+ UpdateFromInputSession->NumDataBytes += numDataBytes;
+ UpdateFromInputSession->Ping = Min(UpdateFromInputSession->Ping, ping);
+ }
+
+ for (;;) {
+ EUpdateState state = Context->UpdateState;
+ EUpdateState next;
+
+ // calculate next state
+ switch (state) {
+ case EUpdateState::NONE:
+ case EUpdateState::CONFIRMING:
+ // we have no inflight messages to session actor, we will issue one a bit later
+ next = EUpdateState::INFLIGHT;
+ break;
+
+ case EUpdateState::INFLIGHT:
+ case EUpdateState::INFLIGHT_AND_PENDING:
+ // we already have inflight message, so we will keep pending message and session actor will issue
+ // TEvConfirmUpdate to kick processing
+ next = EUpdateState::INFLIGHT_AND_PENDING;
+ break;
+ }
+
+ if (Context->UpdateState.compare_exchange_weak(state, next)) {
+ switch (next) {
+ case EUpdateState::INFLIGHT:
+ Send(SessionId, UpdateFromInputSession.Release());
+ break;
+
+ case EUpdateState::INFLIGHT_AND_PENDING:
+ Y_VERIFY(UpdateFromInputSession);
+ break;
+
+ default:
+ Y_FAIL("unexpected state");
+ }
+ break;
+ }
+ }
+ }
+
+ void TInputSessionTCP::ProcessHeader(size_t headerLen) {
+ const bool success = IncomingData.ExtractFrontPlain(Header.Data, headerLen);
+ Y_VERIFY(success);
+ if (Params.UseModernFrame) {
+ PayloadSize = Header.v2.PayloadLength;
+ HeaderSerial = Header.v2.Serial;
+ HeaderConfirm = Header.v2.Confirm;
+ if (!Params.Encryption) {
+ ChecksumExpected = std::exchange(Header.v2.Checksum, 0);
+ Checksum = Crc32cExtendMSanCompatible(0, &Header.v2, sizeof(Header.v2)); // start calculating checksum now
+ if (!PayloadSize && Checksum != ChecksumExpected) {
+ LOG_ERROR_IC_SESSION("ICIS10", "payload checksum error");
+ return ReestablishConnection(TDisconnectReason::ChecksumError());
+ }
+ }
+ } else if (!Header.v1.Check()) {
+ LOG_ERROR_IC_SESSION("ICIS03", "header checksum error");
+ return ReestablishConnection(TDisconnectReason::ChecksumError());
+ } else {
+ PayloadSize = Header.v1.DataSize;
+ HeaderSerial = Header.v1.Serial;
+ HeaderConfirm = Header.v1.Confirm;
+ ChecksumExpected = Header.v1.PayloadCRC32;
+ Checksum = 0;
+ }
+ if (PayloadSize >= 65536) {
+ LOG_CRIT_IC_SESSION("ICIS07", "payload is way too big");
+ return DestroySession(TDisconnectReason::FormatError());
+ }
+ if (ConfirmedByInput < HeaderConfirm) {
+ ConfirmedByInput = HeaderConfirm;
+ if (AtomicGet(Context->ControlPacketId) <= HeaderConfirm && !NewPingProtocol) {
+ ui64 sendTime = AtomicGet(Context->ControlPacketSendTimer);
+ TDuration duration = CyclesToDuration(GetCycleCountFast() - sendTime);
+ const auto durationUs = duration.MicroSeconds();
+ Metrics->UpdateLegacyPingTimeHist(durationUs);
+ PingQ.push_back(duration);
+ if (PingQ.size() > 16) {
+ PingQ.pop_front();
+ }
+ AtomicSet(Context->ControlPacketId, 0ULL);
+ }
+ }
+ if (PayloadSize) {
+ const ui64 expected = Context->GetLastProcessedPacketSerial() + 1;
+ if (HeaderSerial == 0 || HeaderSerial > expected) {
+ LOG_CRIT_IC_SESSION("ICIS06", "packet serial %" PRIu64 ", but %" PRIu64 " expected", HeaderSerial, expected);
+ return DestroySession(TDisconnectReason::FormatError());
+ }
+ IgnorePayload = HeaderSerial != expected;
+ State = EState::PAYLOAD;
+ } else if (HeaderSerial & TTcpPacketBuf::PingRequestMask) {
+ Send(SessionId, new TEvProcessPingRequest(HeaderSerial & ~TTcpPacketBuf::PingRequestMask));
+ } else if (HeaderSerial & TTcpPacketBuf::PingResponseMask) {
+ const ui64 sent = HeaderSerial & ~TTcpPacketBuf::PingResponseMask;
+ const ui64 received = GetCycleCountFast();
+ HandlePingResponse(CyclesToDuration(received - sent));
+ } else if (HeaderSerial & TTcpPacketBuf::ClockMask) {
+ HandleClock(TInstant::MicroSeconds(HeaderSerial & ~TTcpPacketBuf::ClockMask));
+ }
+ }
+
+ void TInputSessionTCP::ProcessPayload(ui64& numDataBytes) {
+ const size_t numBytes = Min(PayloadSize, IncomingData.GetSize());
+ IncomingData.ExtractFront(numBytes, &Payload);
+ numDataBytes += numBytes;
+ PayloadSize -= numBytes;
+ if (PayloadSize) {
+ return; // there is still some data to receive in the Payload rope
+ }
+ State = EState::HEADER; // we'll continue with header next time
+ if (!Params.UseModernFrame || !Params.Encryption) { // see if we are checksumming packet body
+ for (const auto&& [data, size] : Payload) {
+ Checksum = Crc32cExtendMSanCompatible(Checksum, data, size);
+ }
+ if (Checksum != ChecksumExpected) { // validate payload checksum
+ LOG_ERROR_IC_SESSION("ICIS04", "payload checksum error");
+ return ReestablishConnection(TDisconnectReason::ChecksumError());
+ }
+ }
+ if (Y_UNLIKELY(IgnorePayload)) {
+ return;
+ }
+ if (!Context->AdvanceLastProcessedPacketSerial()) {
+ return DestroySession(TDisconnectReason::NewSession());
+ }
+
+ while (Payload && Socket) {
+ // extract channel part header from the payload stream
+ TChannelPart part;
+ if (!Payload.ExtractFrontPlain(&part, sizeof(part))) {
+ LOG_CRIT_IC_SESSION("ICIS14", "missing TChannelPart header in payload");
+ return DestroySession(TDisconnectReason::FormatError());
+ }
+ if (!part.Size) { // bogus frame
+ continue;
+ } else if (Payload.GetSize() < part.Size) {
+ LOG_CRIT_IC_SESSION("ICIS08", "payload format error ChannelPart# %s", part.ToString().data());
+ return DestroySession(TDisconnectReason::FormatError());
+ }
+
+ const ui16 channel = part.Channel & ~TChannelPart::LastPartFlag;
+ TRope *eventData = channel < Context->ChannelArray.size()
+ ? &Context->ChannelArray[channel]
+ : &Context->ChannelMap[channel];
+
+ Metrics->AddInputChannelsIncomingTraffic(channel, sizeof(part) + part.Size);
+
+ TEventDescr descr;
+ if (~part.Channel & TChannelPart::LastPartFlag) {
+ Payload.ExtractFront(part.Size, eventData);
+ } else if (part.Size != sizeof(descr)) {
+ LOG_CRIT_IC_SESSION("ICIS11", "incorrect last part of an event");
+ return DestroySession(TDisconnectReason::FormatError());
+ } else if (Payload.ExtractFrontPlain(&descr, sizeof(descr))) {
+ Metrics->IncInputChannelsIncomingEvents(channel);
+ ProcessEvent(*eventData, descr);
+ *eventData = TRope();
+ } else {
+ Y_FAIL();
+ }
+ }
+ }
+
+ void TInputSessionTCP::ProcessEvent(TRope& data, TEventDescr& descr) {
+ if (!Params.UseModernFrame || descr.Checksum) {
+ ui32 checksum = 0;
+ for (const auto&& [data, size] : data) {
+ checksum = Crc32cExtendMSanCompatible(checksum, data, size);
+ }
+ if (checksum != descr.Checksum) {
+ LOG_CRIT_IC_SESSION("ICIS05", "event checksum error");
+ return ReestablishConnection(TDisconnectReason::ChecksumError());
+ }
+ }
+ auto ev = std::make_unique<IEventHandle>(SessionId,
+ descr.Type,
+ descr.Flags & ~IEventHandle::FlagExtendedFormat,
+ descr.Recipient,
+ descr.Sender,
+ MakeIntrusive<TEventSerializedData>(std::move(data), bool(descr.Flags & IEventHandle::FlagExtendedFormat)),
+ descr.Cookie,
+ Params.PeerScopeId,
+ NWilson::TTraceId(descr.TraceId));
+ if (Common->EventFilter && !Common->EventFilter->CheckIncomingEvent(*ev, Common->LocalScopeId)) {
+ LOG_CRIT_IC_SESSION("ICIC03", "Event dropped due to scope error LocalScopeId# %s PeerScopeId# %s Type# 0x%08" PRIx32,
+ ScopeIdToString(Common->LocalScopeId).data(), ScopeIdToString(Params.PeerScopeId).data(), descr.Type);
+ ev.reset();
+ }
+ if (ev) {
+ TActivationContext::Send(ev.release());
+ }
+ }
+
+ void TInputSessionTCP::HandleConfirmUpdate() {
+ for (;;) {
+ switch (EUpdateState state = Context->UpdateState) {
+ case EUpdateState::NONE:
+ case EUpdateState::INFLIGHT:
+ case EUpdateState::INFLIGHT_AND_PENDING:
+ // here we may have a race
+ return;
+
+ case EUpdateState::CONFIRMING:
+ Y_VERIFY(UpdateFromInputSession);
+ if (Context->UpdateState.compare_exchange_weak(state, EUpdateState::INFLIGHT)) {
+ Send(SessionId, UpdateFromInputSession.Release());
+ return;
+ }
+ }
+ }
+ }
+
+ bool TInputSessionTCP::ReadMore() {
+ PreallocateBuffers();
+
+ TStackVec<TIoVec, NumPreallocatedBuffers> buffs;
+ for (const auto& item : Buffers) {
+ TIoVec iov{item->GetBuffer(), item->GetCapacity()};
+ buffs.push_back(iov);
+ if (Params.Encryption) {
+ break; // do not put more than one buffer in queue to prevent using ReadV
+ }
+ }
+
+ const struct iovec* iovec = reinterpret_cast<const struct iovec*>(buffs.data());
+ int iovcnt = buffs.size();
+
+ ssize_t recvres = 0;
+ TString err;
+ LWPROBE_IF_TOO_LONG(SlowICReadFromSocket, ms) {
+ do {
+#ifndef _win_
+ recvres = iovcnt == 1 ? Socket->Recv(iovec->iov_base, iovec->iov_len, &err) : Socket->ReadV(iovec, iovcnt);
+#else
+ recvres = Socket->Recv(iovec[0].iov_base, iovec[0].iov_len, &err);
+#endif
+ Metrics->IncRecvSyscalls();
+ } while (recvres == -EINTR);
+ }
+
+ LOG_DEBUG_IC_SESSION("ICIS12", "ReadMore recvres# %zd iovcnt# %d err# %s", recvres, iovcnt, err.data());
+
+ if (recvres <= 0 || CloseInputSessionRequested) {
+ if ((-recvres != EAGAIN && -recvres != EWOULDBLOCK) || CloseInputSessionRequested) {
+ TString message = CloseInputSessionRequested ? "connection closed by debug command"
+ : recvres == 0 ? "connection closed by peer"
+ : err ? err
+ : Sprintf("socket: %s", strerror(-recvres));
+ LOG_NOTICE_NET(NodeId, "%s", message.data());
+ ReestablishConnection(CloseInputSessionRequested ? TDisconnectReason::Debug() :
+ recvres == 0 ? TDisconnectReason::EndOfStream() : TDisconnectReason::FromErrno(-recvres));
+ } else if (PollerToken && !std::exchange(Context->ReadPending, true)) {
+ if (Params.Encryption) {
+ auto *secure = static_cast<NInterconnect::TSecureSocket*>(Socket.Get());
+ const bool wantRead = secure->WantRead(), wantWrite = secure->WantWrite();
+ Y_VERIFY_DEBUG(wantRead || wantWrite);
+ PollerToken->Request(wantRead, wantWrite);
+ } else {
+ PollerToken->Request(true, false);
+ }
+ }
+ return false;
+ }
+
+ Y_VERIFY(recvres > 0);
+ Metrics->AddTotalBytesRead(recvres);
+ TDeque<TIntrusivePtr<TRopeAlignedBuffer>>::iterator it;
+ for (it = Buffers.begin(); recvres; ++it) {
+ Y_VERIFY(it != Buffers.end());
+ const size_t bytesFromFrontBuffer = Min<size_t>(recvres, (*it)->GetCapacity());
+ (*it)->AdjustSize(bytesFromFrontBuffer);
+ IncomingData.Insert(IncomingData.End(), TRope(std::move(*it)));
+ recvres -= bytesFromFrontBuffer;
+ }
+ Buffers.erase(Buffers.begin(), it);
+
+ LastReceiveTimestamp = TActivationContext::Now();
+
+ return true;
+ }
+
+ void TInputSessionTCP::PreallocateBuffers() {
+ // ensure that we have exactly "numBuffers" in queue
+ LWPROBE_IF_TOO_LONG(SlowICReadLoopAdjustSize, ms) {
+ const ui32 target = Params.Encryption ? 1 : NumPreallocatedBuffers;
+ while (Buffers.size() < target) {
+ Buffers.emplace_back(TRopeAlignedBuffer::Allocate(sizeof(TTcpPacketBuf)));
+ }
+ }
+ }
+
+ void TInputSessionTCP::ReestablishConnection(TDisconnectReason reason) {
+ LOG_DEBUG_IC_SESSION("ICIS09", "ReestablishConnection, reason# %s", reason.ToString().data());
+ AtomicDecrement(Context->NumInputSessions);
+ Send(SessionId, new TEvSocketDisconnect(std::move(reason)));
+ PassAway();
+ Socket.Reset();
+ }
+
+ void TInputSessionTCP::DestroySession(TDisconnectReason reason) {
+ LOG_DEBUG_IC_SESSION("ICIS13", "DestroySession, reason# %s", reason.ToString().data());
+ AtomicDecrement(Context->NumInputSessions);
+ Send(SessionId, TInterconnectSessionTCP::NewEvTerminate(std::move(reason)));
+ PassAway();
+ Socket.Reset();
+ }
+
+ void TInputSessionTCP::HandleCheckDeadPeer() {
+ const TInstant now = TActivationContext::Now();
+ if (now >= LastReceiveTimestamp + DeadPeerTimeout) {
+ ReceiveData();
+ if (Socket && now >= LastReceiveTimestamp + DeadPeerTimeout) {
+ // nothing has changed, terminate session
+ DestroySession(TDisconnectReason::DeadPeer());
+ }
+ }
+ Schedule(LastReceiveTimestamp + DeadPeerTimeout - now, new TEvCheckDeadPeer);
+ }
+
+ void TInputSessionTCP::HandlePingResponse(TDuration passed) {
+ PingQ.push_back(passed);
+ if (PingQ.size() > 16) {
+ PingQ.pop_front();
+ }
+ const TDuration ping = *std::min_element(PingQ.begin(), PingQ.end());
+ const auto pingUs = ping.MicroSeconds();
+ Context->PingRTT_us = pingUs;
+ NewPingProtocol = true;
+ Metrics->UpdateLegacyPingTimeHist(pingUs);
+ }
+
+ void TInputSessionTCP::HandleClock(TInstant clock) {
+ const TInstant here = TInstant::Now(); // wall clock
+ const TInstant remote = clock + TDuration::MicroSeconds(Context->PingRTT_us / 2);
+ i64 skew = remote.MicroSeconds() - here.MicroSeconds();
+ SkewQ.push_back(skew);
+ if (SkewQ.size() > 16) {
+ SkewQ.pop_front();
+ }
+ i64 clockSkew = SkewQ.front();
+ for (i64 skew : SkewQ) {
+ if (abs(skew) < abs(clockSkew)) {
+ clockSkew = skew;
+ }
+ }
+ Context->ClockSkew_us = clockSkew;
+ Metrics->SetClockSkewMicrosec(clockSkew);
+ }
+
+
+}
diff --git a/library/cpp/actors/interconnect/interconnect_tcp_proxy.cpp b/library/cpp/actors/interconnect/interconnect_tcp_proxy.cpp
new file mode 100644
index 0000000000..7e2d8ccb94
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_tcp_proxy.cpp
@@ -0,0 +1,936 @@
+#include "interconnect_tcp_proxy.h"
+#include "interconnect_handshake.h"
+#include "interconnect_tcp_session.h"
+#include <library/cpp/actors/core/log.h>
+#include <library/cpp/actors/protos/services_common.pb.h>
+#include <library/cpp/monlib/service/pages/templates.h>
+#include <util/system/getpid.h>
+
+namespace NActors {
+ static constexpr TDuration GetNodeRequestTimeout = TDuration::Seconds(5);
+
+ static constexpr TDuration FirstErrorSleep = TDuration::MilliSeconds(10);
+ static constexpr TDuration MaxErrorSleep = TDuration::Seconds(10);
+ static constexpr ui32 SleepRetryMultiplier = 4;
+
+ static TString PeerNameForHuman(ui32 nodeNum, const TString& longName, ui16 port) {
+ TStringBuf token;
+ TStringBuf(longName).NextTok('.', token);
+ return ToString<ui32>(nodeNum) + ":" + (token.size() > 0 ? TString(token) : longName) + ":" + ToString<ui16>(port);
+ }
+
+ TInterconnectProxyTCP::TInterconnectProxyTCP(const ui32 node, TInterconnectProxyCommon::TPtr common,
+ IActor **dynamicPtr)
+ : TActor(&TThis::StateInit)
+ , PeerNodeId(node)
+ , DynamicPtr(dynamicPtr)
+ , Common(std::move(common))
+ , SecureContext(new NInterconnect::TSecureSocketContext(Common->Settings.Certificate, Common->Settings.PrivateKey,
+ Common->Settings.CaFilePath, Common->Settings.CipherList))
+ {
+ Y_VERIFY(Common);
+ Y_VERIFY(Common->NameserviceId);
+ if (DynamicPtr) {
+ Y_VERIFY(!*DynamicPtr);
+ *DynamicPtr = this;
+ }
+ }
+
+ void TInterconnectProxyTCP::Bootstrap() {
+ SetPrefix(Sprintf("Proxy %s [node %" PRIu32 "]", SelfId().ToString().data(), PeerNodeId));
+
+ SwitchToInitialState();
+ PassAwayTimestamp = TActivationContext::Now() + TDuration::Seconds(15);
+
+ LOG_INFO_IC("ICP01", "ready to work");
+ }
+
+ void TInterconnectProxyTCP::Registered(TActorSystem* sys, const TActorId& owner) {
+ if (!DynamicPtr) {
+ // perform usual bootstrap for static nodes
+ sys->Send(new IEventHandle(TEvents::TSystem::Bootstrap, 0, SelfId(), owner, nullptr, 0));
+ }
+ if (const auto& mon = Common->RegisterMonPage) {
+ TString path = Sprintf("peer%04" PRIu32, PeerNodeId);
+ TString title = Sprintf("Peer #%04" PRIu32, PeerNodeId);
+ mon(path, title, sys, SelfId());
+ }
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // PendingActivation
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ void TInterconnectProxyTCP::RequestNodeInfo(STATEFN_SIG) {
+ ICPROXY_PROFILED;
+
+ Y_VERIFY(!IncomingHandshakeActor && !OutgoingHandshakeActor && !PendingIncomingHandshakeEvents && !PendingSessionEvents);
+ EnqueueSessionEvent(ev);
+ StartConfiguring();
+ }
+
+ void TInterconnectProxyTCP::RequestNodeInfoForIncomingHandshake(STATEFN_SIG) {
+ ICPROXY_PROFILED;
+
+ if (!Terminated) {
+ Y_VERIFY(!IncomingHandshakeActor && !OutgoingHandshakeActor && !PendingIncomingHandshakeEvents && !PendingSessionEvents);
+ EnqueueIncomingHandshakeEvent(ev);
+ StartConfiguring();
+ }
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // PendingNodeInfo
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ void TInterconnectProxyTCP::StartConfiguring() {
+ ICPROXY_PROFILED;
+
+ Y_VERIFY(!IncomingHandshakeActor && !OutgoingHandshakeActor);
+
+ // issue node info request
+ Send(Common->NameserviceId, new TEvInterconnect::TEvGetNode(PeerNodeId));
+
+ // arm configure timer; store pointer to event to ensure that we will handle correct one if there were any other
+ // wakeup events in flight
+ SwitchToState(__LINE__, "PendingNodeInfo", &TThis::PendingNodeInfo, GetNodeRequestTimeout,
+ ConfigureTimeoutCookie = new TEvents::TEvWakeup);
+ }
+
+ void TInterconnectProxyTCP::Configure(TEvInterconnect::TEvNodeInfo::TPtr& ev) {
+ ICPROXY_PROFILED;
+
+ Y_VERIFY(!IncomingHandshakeActor && !OutgoingHandshakeActor && !Session);
+
+ if (!ev->Get()->Node) {
+ TransitToErrorState("cannot get node info");
+ } else {
+ auto& info = *ev->Get()->Node;
+ TString name = PeerNameForHuman(PeerNodeId, info.Host, info.Port);
+ TechnicalPeerHostName = info.Host;
+ if (!Metrics) {
+ Metrics = Common->Metrics ? CreateInterconnectMetrics(Common) : CreateInterconnectCounters(Common);
+ }
+ Metrics->SetPeerInfo(name, info.Location.GetDataCenterId());
+
+ LOG_DEBUG_IC("ICP02", "configured for host %s", name.data());
+
+ ProcessConfigured();
+ }
+ }
+
+ void TInterconnectProxyTCP::ConfigureTimeout(TEvents::TEvWakeup::TPtr& ev) {
+ ICPROXY_PROFILED;
+
+ if (ev->Get() == ConfigureTimeoutCookie) {
+ TransitToErrorState("timed out while waiting for node info");
+ }
+ }
+
+ void TInterconnectProxyTCP::ProcessConfigured() {
+ ICPROXY_PROFILED;
+
+ // if the request was initiated by some activity involving Interconnect, then we are expected to start handshake
+ if (PendingSessionEvents) {
+ StartInitialHandshake();
+ }
+
+ // process incoming handshake requests; all failures were ejected from the queue along with the matching initiation requests
+ for (THolder<IEventHandle>& ev : PendingIncomingHandshakeEvents) {
+ TAutoPtr<IEventHandle> x(ev.Release());
+ IncomingHandshake(x);
+ }
+ PendingIncomingHandshakeEvents.clear();
+
+ // possible situation -- incoming handshake arrives, but actually it is not satisfied and rejected; in this case
+ // we are going to return to initial state as we have nothing to do
+ if (!IncomingHandshakeActor && !OutgoingHandshakeActor) {
+ SwitchToInitialState();
+ }
+ }
+
+ void TInterconnectProxyTCP::StartInitialHandshake() {
+ ICPROXY_PROFILED;
+
+ // since we are starting initial handshake for some reason, we'll drop any existing handshakes, if any
+ DropHandshakes();
+
+ // create and register handshake actor
+ OutgoingHandshakeActor = Register(CreateOutgoingHandshakeActor(Common, GenerateSessionVirtualId(),
+ TActorId(), PeerNodeId, 0, TechnicalPeerHostName, TSessionParams()), TMailboxType::ReadAsFilled);
+ OutgoingHandshakeActorCreated = TActivationContext::Now();
+
+ // prepare for new handshake
+ PrepareNewSessionHandshake();
+ }
+
+ void TInterconnectProxyTCP::StartResumeHandshake(ui64 inputCounter) {
+ ICPROXY_PROFILED;
+
+ // drop outgoing handshake if we have one; keep incoming handshakes as they may be useful
+ DropOutgoingHandshake();
+
+ // ensure that we have session
+ Y_VERIFY(Session);
+
+ // ensure that we have both virtual ids
+ Y_VERIFY(SessionVirtualId);
+ Y_VERIFY(RemoteSessionVirtualId);
+
+ // create and register handshake actor
+ OutgoingHandshakeActor = Register(CreateOutgoingHandshakeActor(Common, SessionVirtualId,
+ RemoteSessionVirtualId, PeerNodeId, inputCounter, TechnicalPeerHostName, Session->Params),
+ TMailboxType::ReadAsFilled);
+ OutgoingHandshakeActorCreated = TActivationContext::Now();
+ }
+
+ void TInterconnectProxyTCP::IssueIncomingHandshakeReply(const TActorId& handshakeId, ui64 peerLocalId,
+ THolder<IEventBase> event) {
+ ICPROXY_PROFILED;
+
+ Y_VERIFY(!IncomingHandshakeActor);
+ IncomingHandshakeActor = handshakeId;
+ IncomingHandshakeActorFilledIn = TActivationContext::Now();
+ Y_VERIFY(!LastSerialFromIncomingHandshake || *LastSerialFromIncomingHandshake <= peerLocalId);
+ LastSerialFromIncomingHandshake = peerLocalId;
+
+ if (OutgoingHandshakeActor && SelfId().NodeId() < PeerNodeId) {
+ // Both outgoing and incoming handshake are in progress. To prevent race condition during semultanous handshake
+ // incoming handshake must be held till outgoing handshake is complete or failed
+ LOG_DEBUG_IC("ICP06", "reply for incoming handshake (actor %s) is held", IncomingHandshakeActor.ToString().data());
+ HeldHandshakeReply = std::move(event);
+
+ // Check that we are in one of acceptable states that would properly handle handshake statuses.
+ const auto state = CurrentStateFunc();
+ Y_VERIFY(state == &TThis::PendingConnection || state == &TThis::StateWork, "invalid handshake request in state# %s", State);
+ } else {
+ LOG_DEBUG_IC("ICP07", "issued incoming handshake reply");
+
+ // No race, so we can send reply immediately.
+ Y_VERIFY(!HeldHandshakeReply);
+ Send(IncomingHandshakeActor, event.Release());
+
+ // Start waiting for handshake reply, if not yet started; also, if session is already created, then we don't
+ // switch from working state.
+ if (!Session) {
+ LOG_INFO_IC("ICP08", "No active sessions, becoming PendingConnection");
+ SwitchToState(__LINE__, "PendingConnection", &TThis::PendingConnection);
+ } else {
+ Y_VERIFY(CurrentStateFunc() == &TThis::StateWork);
+ }
+ }
+ }
+
+ void TInterconnectProxyTCP::IncomingHandshake(TEvHandshakeAsk::TPtr& ev) {
+ ICPROXY_PROFILED;
+
+ TEvHandshakeAsk *msg = ev->Get();
+
+ // TEvHandshakeAsk is only applicable for continuation requests
+ LOG_DEBUG_IC("ICP09", "(actor %s) from: %s for: %s", ev->Sender.ToString().data(),
+ ev->Get()->Self.ToString().data(), ev->Get()->Peer.ToString().data());
+
+ if (!Session) {
+ // if there is no open session, report error -- continuation request works only with open sessions
+ LOG_NOTICE_IC("ICP12", "(actor %s) peer tries to resume nonexistent session Self# %s Peer# %s",
+ ev->Sender.ToString().data(), msg->Self.ToString().data(), msg->Peer.ToString().data());
+ } else if (SessionVirtualId != ev->Get()->Peer || RemoteSessionVirtualId != ev->Get()->Self) {
+ // check session virtual ids for continuation
+ LOG_NOTICE_IC("ICP13", "(actor %s) virtual id mismatch with existing session (Peer: %s Self: %s"
+ " SessionVirtualId: %s RemoteSessionVirtualId: %s)", ev->Sender.ToString().data(),
+ ev->Get()->Peer.ToString().data(), ev->Get()->Self.ToString().data(), SessionVirtualId.ToString().data(),
+ RemoteSessionVirtualId.ToString().data());
+ } else {
+ // if we already have incoming handshake, then terminate existing one
+ DropIncomingHandshake();
+
+ // issue reply to the sender, possibly holding it while outgoing handshake is at race
+ THolder<IEventBase> reply = IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::ProcessHandshakeRequest, ev);
+ return IssueIncomingHandshakeReply(ev->Sender, RemoteSessionVirtualId.LocalId(), std::move(reply));
+ }
+
+ // error case -- report error to the handshake actor
+ Send(ev->Sender, new TEvHandshakeNak);
+ }
+
+ void TInterconnectProxyTCP::IncomingHandshake(TEvHandshakeRequest::TPtr& ev) {
+ ICPROXY_PROFILED;
+
+ LOG_DEBUG_IC("ICP17", "incoming handshake (actor %s)", ev->Sender.ToString().data());
+
+ const auto& record = ev->Get()->Record;
+ ui64 remotePID = record.GetProgramPID();
+ ui64 remoteStartTime = record.GetProgramStartTime();
+ ui64 remoteSerial = record.GetSerial();
+
+ if (RemoteProgramInfo && remotePID == RemoteProgramInfo->PID && remoteStartTime == RemoteProgramInfo->StartTime) {
+ if (remoteSerial < RemoteProgramInfo->Serial) {
+ LOG_INFO_IC("ICP18", "handshake (actor %s) is too old", ev->Sender.ToString().data());
+ Send(ev->Sender, new TEvents::TEvPoisonPill);
+ return;
+ } else {
+ RemoteProgramInfo->Serial = remoteSerial;
+ }
+ } else {
+ const auto ptr = new TProgramInfo;
+ ptr->PID = remotePID;
+ ptr->StartTime = remoteStartTime;
+ ptr->Serial = remoteSerial;
+ RemoteProgramInfo.Reset(ptr);
+ }
+
+ /* Let's check peer technical hostname */
+ if (record.HasSenderHostName() && TechnicalPeerHostName != record.GetSenderHostName()) {
+ Send(ev->Sender, new TEvHandshakeReplyError("host name mismatch"));
+ return;
+ }
+
+ // check sender actor id and check if it is not very old
+ if (LastSerialFromIncomingHandshake) {
+ const ui64 serial = record.GetSerial();
+ if (serial < *LastSerialFromIncomingHandshake) {
+ LOG_NOTICE_IC("ICP15", "Handshake# %s has duplicate serial# %" PRIu64
+ " LastSerialFromIncomingHandshake# %" PRIu64, ev->Sender.ToString().data(),
+ serial, *LastSerialFromIncomingHandshake);
+ Send(ev->Sender, new TEvHandshakeReplyError("duplicate serial"));
+ return;
+ } else if (serial == *LastSerialFromIncomingHandshake) {
+ LOG_NOTICE_IC("ICP15", "Handshake# %s is obsolete, serial# %" PRIu64
+ " LastSerialFromIncomingHandshake# %" PRIu64, ev->Sender.ToString().data(),
+ serial, *LastSerialFromIncomingHandshake);
+ Send(ev->Sender, new TEvents::TEvPoisonPill);
+ return;
+ }
+ }
+
+ // drop incoming handshake as this is definitely more recent
+ DropIncomingHandshake();
+
+ // prepare for new session
+ PrepareNewSessionHandshake();
+
+ auto event = MakeHolder<TEvHandshakeReplyOK>();
+ auto* pb = event->Record.MutableSuccess();
+ const TActorId virtualId = GenerateSessionVirtualId();
+ pb->SetProtocol(INTERCONNECT_PROTOCOL_VERSION);
+ pb->SetSenderActorId(virtualId.ToString());
+ pb->SetProgramPID(GetPID());
+ pb->SetProgramStartTime(Common->StartTime);
+ pb->SetSerial(virtualId.LocalId());
+
+ IssueIncomingHandshakeReply(ev->Sender, 0, std::move(event));
+ }
+
+ void TInterconnectProxyTCP::HandleHandshakeStatus(TEvHandshakeDone::TPtr& ev) {
+ ICPROXY_PROFILED;
+
+ TEvHandshakeDone *msg = ev->Get();
+
+ // Terminate handshake actor working in opposite direction, if set up.
+ if (ev->Sender == IncomingHandshakeActor) {
+ LOG_INFO_IC("ICP19", "incoming handshake succeeded");
+ DropIncomingHandshake(false);
+ DropOutgoingHandshake();
+ } else if (ev->Sender == OutgoingHandshakeActor) {
+ LOG_INFO_IC("ICP20", "outgoing handshake succeeded");
+ DropIncomingHandshake();
+ DropOutgoingHandshake(false);
+ } else {
+ /* It seems to be an old handshake. */
+ return;
+ }
+
+ Y_VERIFY(!IncomingHandshakeActor && !OutgoingHandshakeActor);
+ SwitchToState(__LINE__, "StateWork", &TThis::StateWork);
+
+ if (Session) {
+ // this is continuation request, check that virtual ids match
+ Y_VERIFY(SessionVirtualId == msg->Self && RemoteSessionVirtualId == msg->Peer);
+ } else {
+ // this is initial request, check that we have virtual ids not filled in
+ Y_VERIFY(!SessionVirtualId && !RemoteSessionVirtualId);
+ }
+
+ auto error = [&](const char* description) {
+ TransitToErrorState(description);
+ };
+
+ // If session is not created, then create new one.
+ if (!Session) {
+ RemoteProgramInfo = std::move(msg->ProgramInfo);
+ if (!RemoteProgramInfo) {
+ // we have received resume handshake, but session was closed concurrently while handshaking
+ return error("Session continuation race");
+ }
+
+ // Create new session actor.
+ SessionID = RegisterWithSameMailbox(Session = new TInterconnectSessionTCP(this, msg->Params));
+ IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::Init);
+ SessionVirtualId = msg->Self;
+ RemoteSessionVirtualId = msg->Peer;
+ LOG_INFO_IC("ICP22", "created new session: %s", SessionID.ToString().data());
+ }
+
+ // ensure that we have session local/peer virtual ids
+ Y_VERIFY(Session && SessionVirtualId && RemoteSessionVirtualId);
+
+ // Set up new connection for the session.
+ IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::SetNewConnection, ev);
+
+ // Reset retry timer
+ HoldByErrorWakeupDuration = TDuration::Zero();
+
+ /* Forward all held events */
+ ProcessPendingSessionEvents();
+ }
+
+ void TInterconnectProxyTCP::HandleHandshakeStatus(TEvHandshakeFail::TPtr& ev) {
+ ICPROXY_PROFILED;
+
+ // update error state log; this fail is inconclusive unless this is the last pending handshake
+ const bool inconclusive = (ev->Sender != IncomingHandshakeActor && ev->Sender != OutgoingHandshakeActor) ||
+ (IncomingHandshakeActor && OutgoingHandshakeActor);
+ LogHandshakeFail(ev, inconclusive);
+
+ if (ev->Sender == IncomingHandshakeActor) {
+ LOG_NOTICE_IC("ICP24", "incoming handshake failed, temporary: %" PRIu32 " explanation: %s outgoing: %s",
+ ui32(ev->Get()->Temporary), ev->Get()->Explanation.data(), OutgoingHandshakeActor.ToString().data());
+ DropIncomingHandshake(false);
+ } else if (ev->Sender == OutgoingHandshakeActor) {
+ LOG_NOTICE_IC("ICP25", "outgoing handshake failed, temporary: %" PRIu32 " explanation: %s incoming: %s held: %s",
+ ui32(ev->Get()->Temporary), ev->Get()->Explanation.data(), IncomingHandshakeActor.ToString().data(),
+ HeldHandshakeReply ? "yes" : "no");
+ DropOutgoingHandshake(false);
+
+ if (IEventBase* reply = HeldHandshakeReply.Release()) {
+ Y_VERIFY(IncomingHandshakeActor);
+ LOG_DEBUG_IC("ICP26", "sent held handshake reply to %s", IncomingHandshakeActor.ToString().data());
+ Send(IncomingHandshakeActor, reply);
+ }
+
+ // if we have no current session, then we have to drop all pending events as the outgoing handshake has failed
+ ProcessPendingSessionEvents();
+ } else {
+ /* It seems to be an old fail, just ignore it */
+ LOG_NOTICE_IC("ICP27", "obsolete handshake fail ignored");
+ return;
+ }
+
+ if (Metrics) {
+ Metrics->IncHandshakeFails();
+ }
+
+ if (IncomingHandshakeActor || OutgoingHandshakeActor) {
+ // one of handshakes is still going on
+ LOG_DEBUG_IC("ICP28", "other handshake is still going on");
+ return;
+ }
+
+ switch (ev->Get()->Temporary) {
+ case TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT:
+ if (!Session) {
+ if (PendingSessionEvents) {
+ // try to start outgoing handshake as we have some events enqueued
+ StartInitialHandshake();
+ } else {
+ // return back to initial state as we have no session and no pending handshakes
+ SwitchToInitialState();
+ }
+ } else if (Session->Socket) {
+ // try to reestablish connection -- meaning restart handshake from the last known position
+ IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::ReestablishConnectionWithHandshake,
+ TDisconnectReason::HandshakeFailTransient());
+ } else {
+ // we have no active connection in that session, so just restart handshake from last known position
+ IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::StartHandshake);
+ }
+ break;
+
+ case TEvHandshakeFail::HANDSHAKE_FAIL_SESSION_MISMATCH:
+ StartInitialHandshake();
+ break;
+
+ case TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT:
+ TString timeExplanation = " LastSessionDieTime# " + LastSessionDieTime.ToString();
+ if (Session) {
+ InvokeOtherActor(*Session, &TInterconnectSessionTCP::Terminate,
+ TDisconnectReason::HandshakeFailPermanent());
+ }
+ TransitToErrorState(ev->Get()->Explanation + timeExplanation, false);
+ break;
+ }
+ }
+
+ void TInterconnectProxyTCP::LogHandshakeFail(TEvHandshakeFail::TPtr& ev, bool inconclusive) {
+ ICPROXY_PROFILED;
+
+ TString kind = "unknown";
+ switch (ev->Get()->Temporary) {
+ case TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT:
+ kind = Session ? "transient w/session" : "transient w/o session";
+ break;
+
+ case TEvHandshakeFail::HANDSHAKE_FAIL_SESSION_MISMATCH:
+ kind = "session_mismatch";
+ break;
+
+ case TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT:
+ kind = "permanent";
+ break;
+ }
+ if (inconclusive) {
+ kind += " inconclusive";
+ }
+ UpdateErrorStateLog(TActivationContext::Now(), kind, ev->Get()->Explanation);
+ }
+
+ void TInterconnectProxyTCP::ProcessPendingSessionEvents() {
+ ICPROXY_PROFILED;
+
+ while (PendingSessionEvents) {
+ TPendingSessionEvent ev = std::move(PendingSessionEvents.front());
+ PendingSessionEventsSize -= ev.Size;
+ TAutoPtr<IEventHandle> event(ev.Event.Release());
+ PendingSessionEvents.pop_front();
+
+ if (Session) {
+ ForwardSessionEventToSession(event);
+ } else {
+ DropSessionEvent(event);
+ }
+ }
+ }
+
+ void TInterconnectProxyTCP::DropSessionEvent(STATEFN_SIG) {
+ ICPROXY_PROFILED;
+
+ ValidateEvent(ev, "DropSessionEvent");
+ switch (ev->GetTypeRewrite()) {
+ case TEvInterconnect::EvForward:
+ if (ev->Flags & IEventHandle::FlagSubscribeOnSession) {
+ Send(ev->Sender, new TEvInterconnect::TEvNodeDisconnected(PeerNodeId), 0, ev->Cookie);
+ }
+ TActivationContext::Send(ev->ForwardOnNondelivery(TEvents::TEvUndelivered::Disconnected));
+ break;
+
+ case TEvInterconnect::TEvConnectNode::EventType:
+ case TEvents::TEvSubscribe::EventType:
+ Send(ev->Sender, new TEvInterconnect::TEvNodeDisconnected(PeerNodeId), 0, ev->Cookie);
+ break;
+
+ case TEvents::TEvUnsubscribe::EventType:
+ /* Do nothing */
+ break;
+
+ default:
+ Y_FAIL("Unexpected type of event in held event queue");
+ }
+ }
+
+ void TInterconnectProxyTCP::UnregisterSession(TInterconnectSessionTCP* session) {
+ ICPROXY_PROFILED;
+
+ Y_VERIFY(Session && Session == session && SessionID);
+
+ LOG_INFO_IC("ICP30", "unregister session Session# %s VirtualId# %s", SessionID.ToString().data(),
+ SessionVirtualId.ToString().data());
+
+ Session = nullptr;
+ SessionID = TActorId();
+
+ // drop all pending events as we are closed
+ ProcessPendingSessionEvents();
+
+ // reset virtual ids as this session is terminated
+ SessionVirtualId = TActorId();
+ RemoteSessionVirtualId = TActorId();
+
+ if (Metrics) {
+ Metrics->IncSessionDeaths();
+ }
+ LastSessionDieTime = TActivationContext::Now();
+
+ if (IncomingHandshakeActor || OutgoingHandshakeActor) {
+ PrepareNewSessionHandshake();
+ } else {
+ SwitchToInitialState();
+ }
+ }
+
+ void TInterconnectProxyTCP::EnqueueSessionEvent(STATEFN_SIG) {
+ ICPROXY_PROFILED;
+
+ ValidateEvent(ev, "EnqueueSessionEvent");
+ const ui32 size = ev->GetSize();
+ PendingSessionEventsSize += size;
+ PendingSessionEvents.emplace_back(TActivationContext::Now() + Common->Settings.MessagePendingTimeout, size, ev);
+ ScheduleCleanupEventQueue();
+ CleanupEventQueue();
+ }
+
+ void TInterconnectProxyTCP::EnqueueIncomingHandshakeEvent(STATEFN_SIG) {
+ ICPROXY_PROFILED;
+
+ // enqueue handshake request
+ Y_UNUSED();
+ PendingIncomingHandshakeEvents.emplace_back(ev);
+ }
+
+ void TInterconnectProxyTCP::EnqueueIncomingHandshakeEvent(TEvHandshakeDone::TPtr& /*ev*/) {
+ ICPROXY_PROFILED;
+
+ // TEvHandshakeDone can't get into the queue, because we have to process handshake request first; this may be the
+ // race with the previous handshakes, so simply ignore it
+ }
+
+ void TInterconnectProxyTCP::EnqueueIncomingHandshakeEvent(TEvHandshakeFail::TPtr& ev) {
+ ICPROXY_PROFILED;
+
+ for (auto it = PendingIncomingHandshakeEvents.begin(); it != PendingIncomingHandshakeEvents.end(); ++it) {
+ THolder<IEventHandle>& pendingEvent = *it;
+ if (pendingEvent->Sender == ev->Sender) {
+ // we have found cancellation request for the pending handshake request; so simply remove it from the
+ // deque, as we are not interested in failure reason; must likely it happens because of handshake timeout
+ if (pendingEvent->GetTypeRewrite() == TEvHandshakeFail::EventType) {
+ TEvHandshakeFail::TPtr tmp(static_cast<TEventHandle<TEvHandshakeFail>*>(pendingEvent.Release()));
+ LogHandshakeFail(tmp, true);
+ }
+ PendingIncomingHandshakeEvents.erase(it);
+ break;
+ }
+ }
+ }
+
+ void TInterconnectProxyTCP::ForwardSessionEventToSession(STATEFN_SIG) {
+ ICPROXY_PROFILED;
+
+ Y_VERIFY(Session && SessionID);
+ ValidateEvent(ev, "ForwardSessionEventToSession");
+ InvokeOtherActor(*Session, &TInterconnectSessionTCP::Receive, ev, TActivationContext::ActorContextFor(SessionID));
+ }
+
+ void TInterconnectProxyTCP::GenerateHttpInfo(NMon::TEvHttpInfo::TPtr& ev) {
+ ICPROXY_PROFILED;
+
+ LOG_INFO_IC("ICP31", "proxy http called");
+
+ TStringStream str;
+
+ HTML(str) {
+ DIV_CLASS("panel panel-info") {
+ DIV_CLASS("panel-heading") {
+ str << "Proxy";
+ }
+ DIV_CLASS("panel-body") {
+ TABLE_CLASS("table") {
+ TABLEHEAD() {
+ TABLER() {
+ TABLEH() {
+ str << "Sensor";
+ }
+ TABLEH() {
+ str << "Value";
+ }
+ }
+ }
+#define MON_VAR(NAME) \
+ TABLER() { \
+ TABLED() { \
+ str << #NAME; \
+ } \
+ TABLED() { \
+ str << NAME; \
+ } \
+ }
+
+ TABLEBODY() {
+ MON_VAR(TActivationContext::Now())
+ MON_VAR(SessionID)
+ MON_VAR(LastSessionDieTime)
+ MON_VAR(IncomingHandshakeActor)
+ MON_VAR(IncomingHandshakeActorFilledIn)
+ MON_VAR(IncomingHandshakeActorReset)
+ MON_VAR(OutgoingHandshakeActor)
+ MON_VAR(OutgoingHandshakeActorCreated)
+ MON_VAR(OutgoingHandshakeActorReset)
+ MON_VAR(State)
+ MON_VAR(StateSwitchTime)
+ }
+ }
+ }
+ }
+
+ DIV_CLASS("panel panel-info") {
+ DIV_CLASS("panel-heading") {
+ str << "Error Log";
+ }
+ DIV_CLASS("panel-body") {
+ TABLE_CLASS("table") {
+ TABLEHEAD() {
+ TABLER() {
+ TABLEH() {
+ str << "Timestamp";
+ }
+ TABLEH() {
+ str << "Elapsed";
+ }
+ TABLEH() {
+ str << "Kind";
+ }
+ TABLEH() {
+ str << "Explanation";
+ }
+ }
+ }
+ TABLEBODY() {
+ const TInstant now = TActivationContext::Now();
+ const TInstant barrier = now - TDuration::Minutes(1);
+ for (auto it = ErrorStateLog.rbegin(); it != ErrorStateLog.rend(); ++it) {
+ auto wrapper = [&](const auto& lambda) {
+ if (std::get<0>(*it) > barrier) {
+ str << "<strong>";
+ lambda();
+ str << "</strong>";
+ } else {
+ lambda();
+ }
+ };
+ TABLER() {
+ TABLED() {
+ wrapper([&] {
+ str << std::get<0>(*it);
+ });
+ }
+ TABLED() {
+ wrapper([&] {
+ str << now - std::get<0>(*it);
+ });
+ }
+ TABLED() {
+ wrapper([&] {
+ str << std::get<1>(*it);
+ });
+ }
+ TABLED() {
+ wrapper([&] {
+ str << std::get<2>(*it);
+ });
+
+ ui32 rep = std::get<3>(*it);
+ if (rep != 1) {
+ str << " <strong>x" << rep << "</strong>";
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (Session != nullptr) {
+ Session->GenerateHttpInfo(str);
+ }
+
+ Send(ev->Sender, new NMon::TEvHttpInfoRes(str.Str()));
+ }
+
+ void TInterconnectProxyTCP::TransitToErrorState(TString explanation, bool updateErrorLog) {
+ ICPROXY_PROFILED;
+
+ LOG_NOTICE_IC("ICP32", "transit to hold-by-error state Explanation# %s", explanation.data());
+ LOG_INFO(*TlsActivationContext, NActorsServices::INTERCONNECT_STATUS, "[%u] error state: %s", PeerNodeId, explanation.data());
+
+ if (updateErrorLog) {
+ UpdateErrorStateLog(TActivationContext::Now(), "permanent conclusive", explanation);
+ }
+
+ Y_VERIFY(Session == nullptr);
+ Y_VERIFY(!SessionID);
+
+ // recalculate wakeup timeout -- if this is the first failure, then we sleep for default timeout; otherwise we
+ // sleep N times longer than the previous try, but not longer than desired number of seconds
+ HoldByErrorWakeupDuration = HoldByErrorWakeupDuration != TDuration::Zero()
+ ? Min(HoldByErrorWakeupDuration * SleepRetryMultiplier, MaxErrorSleep)
+ : FirstErrorSleep;
+
+ // transit to required state and arm wakeup timer
+ if (Terminated) {
+ // switch to this state permanently
+ SwitchToState(__LINE__, "HoldByError", &TThis::HoldByError);
+ HoldByErrorWakeupCookie = nullptr;
+ } else {
+ SwitchToState(__LINE__, "HoldByError", &TThis::HoldByError, HoldByErrorWakeupDuration,
+ HoldByErrorWakeupCookie = new TEvents::TEvWakeup);
+ }
+
+ /* Process all pending events. */
+ ProcessPendingSessionEvents();
+
+ /* Terminate handshakes */
+ DropHandshakes();
+
+ /* Terminate pending incoming handshake requests. */
+ for (auto& ev : PendingIncomingHandshakeEvents) {
+ Send(ev->Sender, new TEvents::TEvPoisonPill);
+ if (ev->GetTypeRewrite() == TEvHandshakeFail::EventType) {
+ TEvHandshakeFail::TPtr tmp(static_cast<TEventHandle<TEvHandshakeFail>*>(ev.Release()));
+ LogHandshakeFail(tmp, true);
+ }
+ }
+ PendingIncomingHandshakeEvents.clear();
+ }
+
+ void TInterconnectProxyTCP::WakeupFromErrorState(TEvents::TEvWakeup::TPtr& ev) {
+ ICPROXY_PROFILED;
+
+ LOG_INFO_IC("ICP33", "wake up from error state");
+
+ if (ev->Get() == HoldByErrorWakeupCookie) {
+ SwitchToInitialState();
+ }
+ }
+
+ void TInterconnectProxyTCP::Disconnect() {
+ ICPROXY_PROFILED;
+
+ // terminate handshakes (if any)
+ DropHandshakes();
+
+ if (Session) {
+ IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::Terminate, TDisconnectReason::UserRequest());
+ } else {
+ TransitToErrorState("forced disconnect");
+ }
+ }
+
+ void TInterconnectProxyTCP::ScheduleCleanupEventQueue() {
+ ICPROXY_PROFILED;
+
+ if (!CleanupEventQueueScheduled && PendingSessionEvents) {
+ // apply batching at 50 ms granularity
+ Schedule(Max(TDuration::MilliSeconds(50), PendingSessionEvents.front().Deadline - TActivationContext::Now()), new TEvCleanupEventQueue);
+ CleanupEventQueueScheduled = true;
+ }
+ }
+
+ void TInterconnectProxyTCP::HandleCleanupEventQueue() {
+ ICPROXY_PROFILED;
+
+ Y_VERIFY(CleanupEventQueueScheduled);
+ CleanupEventQueueScheduled = false;
+ CleanupEventQueue();
+ ScheduleCleanupEventQueue();
+ }
+
+ void TInterconnectProxyTCP::CleanupEventQueue() {
+ ICPROXY_PROFILED;
+
+ const TInstant now = TActivationContext::Now();
+ while (PendingSessionEvents) {
+ TPendingSessionEvent& ev = PendingSessionEvents.front();
+ if (now >= ev.Deadline || PendingSessionEventsSize > Common->Settings.MessagePendingSize) {
+ TAutoPtr<IEventHandle> event(ev.Event.Release());
+ PendingSessionEventsSize -= ev.Size;
+ DropSessionEvent(event);
+ PendingSessionEvents.pop_front();
+ } else {
+ break;
+ }
+ }
+ }
+
+ void TInterconnectProxyTCP::HandleClosePeerSocket() {
+ ICPROXY_PROFILED;
+
+ if (Session && Session->Socket) {
+ LOG_INFO_IC("ICP34", "closed connection by debug command");
+ Session->Socket->Shutdown(SHUT_RDWR);
+ }
+ }
+
+ void TInterconnectProxyTCP::HandleCloseInputSession() {
+ ICPROXY_PROFILED;
+
+ if (Session) {
+ IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::CloseInputSession);
+ }
+ }
+
+ void TInterconnectProxyTCP::HandlePoisonSession() {
+ ICPROXY_PROFILED;
+
+ if (Session) {
+ IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::Terminate, TDisconnectReason::Debug());
+ }
+ }
+
+ void TInterconnectProxyTCP::HandleSessionBufferSizeRequest(TEvSessionBufferSizeRequest::TPtr& ev) {
+ ICPROXY_PROFILED;
+
+ ui64 bufSize = 0;
+ if (Session) {
+ bufSize = Session->TotalOutputQueueSize;
+ }
+
+ Send(ev->Sender, new TEvSessionBufferSizeResponse(SessionID, bufSize));
+ }
+
+ void TInterconnectProxyTCP::Handle(TEvQueryStats::TPtr& ev) {
+ ICPROXY_PROFILED;
+
+ TProxyStats stats;
+ stats.Path = Sprintf("peer%04" PRIu32, PeerNodeId);
+ stats.State = State;
+ stats.PeerScopeId = Session ? Session->Params.PeerScopeId : TScopeId();
+ stats.LastSessionDieTime = LastSessionDieTime;
+ stats.TotalOutputQueueSize = Session ? Session->TotalOutputQueueSize : 0;
+ stats.Connected = Session ? (bool)Session->Socket : false;
+ stats.Host = TechnicalPeerHostName;
+ stats.Port = 0;
+ ui32 rep = 0;
+ std::tie(stats.LastErrorTimestamp, stats.LastErrorKind, stats.LastErrorExplanation, rep) = ErrorStateLog
+ ? ErrorStateLog.back()
+ : std::make_tuple(TInstant(), TString(), TString(), 1U);
+ if (rep != 1) {
+ stats.LastErrorExplanation += Sprintf(" x%" PRIu32, rep);
+ }
+ stats.Ping = Session ? Session->GetPingRTT() : TDuration::Zero();
+ stats.ClockSkew = Session ? Session->GetClockSkew() : 0;
+ if (Session) {
+ if (auto *x = dynamic_cast<NInterconnect::TSecureSocket*>(Session->Socket.Get())) {
+ stats.Encryption = Sprintf("%s/%u", x->GetCipherName().data(), x->GetCipherBits());
+ } else {
+ stats.Encryption = "none";
+ }
+ }
+
+ auto response = MakeHolder<TEvStats>();
+ response->PeerNodeId = PeerNodeId;
+ response->ProxyStats = std::move(stats);
+ Send(ev->Sender, response.Release());
+ }
+
+ void TInterconnectProxyTCP::HandleTerminate() {
+ ICPROXY_PROFILED;
+
+ if (Session) {
+ IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::Terminate, TDisconnectReason());
+ }
+ Terminated = true;
+ TransitToErrorState("terminated");
+ }
+
+ void TInterconnectProxyTCP::PassAway() {
+ if (Session) {
+ IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::Terminate, TDisconnectReason());
+ }
+ if (DynamicPtr) {
+ Y_VERIFY(*DynamicPtr == this);
+ *DynamicPtr = nullptr;
+ }
+ // TODO: unregister actor mon page
+ TActor::PassAway();
+ }
+}
diff --git a/library/cpp/actors/interconnect/interconnect_tcp_proxy.h b/library/cpp/actors/interconnect/interconnect_tcp_proxy.h
new file mode 100644
index 0000000000..023e5bd1ee
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_tcp_proxy.h
@@ -0,0 +1,537 @@
+#pragma once
+
+#include <library/cpp/actors/core/actor_bootstrapped.h>
+#include <library/cpp/actors/core/hfunc.h>
+#include <library/cpp/actors/core/event_pb.h>
+#include <library/cpp/actors/core/events.h>
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+
+#include "interconnect_common.h"
+#include "interconnect_counters.h"
+#include "interconnect_tcp_session.h"
+#include "profiler.h"
+
+#define ICPROXY_PROFILED TFunction func(*this, __func__, __LINE__)
+
+namespace NActors {
+
+
+ /* WARNING: all proxy actors should be alive during actorsystem activity */
+ class TInterconnectProxyTCP
+ : public TActor<TInterconnectProxyTCP>
+ , public TInterconnectLoggingBase
+ , public TProfiled
+ {
+ enum {
+ EvCleanupEventQueue = EventSpaceBegin(TEvents::ES_PRIVATE),
+ EvQueryStats,
+ EvStats,
+ EvPassAwayIfNeeded,
+ };
+
+ struct TEvCleanupEventQueue : TEventLocal<TEvCleanupEventQueue, EvCleanupEventQueue> {};
+
+ public:
+ struct TEvQueryStats : TEventLocal<TEvQueryStats, EvQueryStats> {};
+
+ struct TProxyStats {
+ TString Path;
+ TString State;
+ TScopeId PeerScopeId;
+ TInstant LastSessionDieTime;
+ ui64 TotalOutputQueueSize;
+ bool Connected;
+ TString Host;
+ ui16 Port;
+ TInstant LastErrorTimestamp;
+ TString LastErrorKind;
+ TString LastErrorExplanation;
+ TDuration Ping;
+ i64 ClockSkew;
+ TString Encryption;
+ };
+
+ struct TEvStats : TEventLocal<TEvStats, EvStats> {
+ ui32 PeerNodeId;
+ TProxyStats ProxyStats;
+ };
+
+ static constexpr EActivityType ActorActivityType() {
+ return INTERCONNECT_PROXY_TCP;
+ }
+
+ TInterconnectProxyTCP(const ui32 node, TInterconnectProxyCommon::TPtr common, IActor **dynamicPtr = nullptr);
+
+ STFUNC(StateInit) {
+ Bootstrap();
+ if (ev->Type != TEvents::TSystem::Bootstrap) { // for dynamic nodes we do not receive Bootstrap event
+ Receive(ev, ctx);
+ }
+ }
+
+ void Bootstrap();
+ void Registered(TActorSystem* sys, const TActorId& owner) override;
+
+ private:
+ friend class TInterconnectSessionTCP;
+ friend class TInterconnectSessionTCPv0;
+ friend class THandshake;
+ friend class TInputSessionTCP;
+
+ void UnregisterSession(TInterconnectSessionTCP* session);
+
+#define SESSION_EVENTS(HANDLER) \
+ fFunc(TEvInterconnect::EvForward, HANDLER) \
+ fFunc(TEvInterconnect::TEvConnectNode::EventType, HANDLER) \
+ fFunc(TEvents::TEvSubscribe::EventType, HANDLER) \
+ fFunc(TEvents::TEvUnsubscribe::EventType, HANDLER)
+
+#define INCOMING_HANDSHAKE_EVENTS(HANDLER) \
+ fFunc(TEvHandshakeAsk::EventType, HANDLER) \
+ fFunc(TEvHandshakeRequest::EventType, HANDLER)
+
+#define HANDSHAKE_STATUS_EVENTS(HANDLER) \
+ hFunc(TEvHandshakeDone, HANDLER) \
+ hFunc(TEvHandshakeFail, HANDLER)
+
+#define PROXY_STFUNC(STATE, SESSION_HANDLER, INCOMING_HANDSHAKE_HANDLER, \
+ HANDSHAKE_STATUS_HANDLER, DISCONNECT_HANDLER, \
+ WAKEUP_HANDLER, NODE_INFO_HANDLER) \
+ STATEFN(STATE) { \
+ const ui32 type = ev->GetTypeRewrite(); \
+ const bool profiled = type != TEvInterconnect::EvForward \
+ && type != TEvInterconnect::EvConnectNode \
+ && type != TEvents::TSystem::Subscribe \
+ && type != TEvents::TSystem::Unsubscribe; \
+ if (profiled) { \
+ TProfiled::Start(); \
+ } \
+ { \
+ TProfiled::TFunction func(*this, __func__, __LINE__); \
+ switch (type) { \
+ SESSION_EVENTS(SESSION_HANDLER) \
+ INCOMING_HANDSHAKE_EVENTS(INCOMING_HANDSHAKE_HANDLER) \
+ HANDSHAKE_STATUS_EVENTS(HANDSHAKE_STATUS_HANDLER) \
+ cFunc(TEvInterconnect::EvDisconnect, DISCONNECT_HANDLER) \
+ hFunc(TEvents::TEvWakeup, WAKEUP_HANDLER) \
+ hFunc(TEvGetSecureSocket, Handle) \
+ hFunc(NMon::TEvHttpInfo, GenerateHttpInfo) \
+ cFunc(EvCleanupEventQueue, HandleCleanupEventQueue) \
+ hFunc(TEvInterconnect::TEvNodeInfo, NODE_INFO_HANDLER) \
+ cFunc(TEvInterconnect::EvClosePeerSocket, HandleClosePeerSocket) \
+ cFunc(TEvInterconnect::EvCloseInputSession, HandleCloseInputSession) \
+ cFunc(TEvInterconnect::EvPoisonSession, HandlePoisonSession) \
+ hFunc(TEvSessionBufferSizeRequest, HandleSessionBufferSizeRequest) \
+ hFunc(TEvQueryStats, Handle) \
+ cFunc(TEvInterconnect::EvTerminate, HandleTerminate) \
+ cFunc(EvPassAwayIfNeeded, HandlePassAwayIfNeeded) \
+ default: \
+ Y_FAIL("unexpected event Type# 0x%08" PRIx32, type); \
+ } \
+ } \
+ if (profiled) { \
+ if (TProfiled::Duration() >= TDuration::MilliSeconds(16)) { \
+ const TString report = TProfiled::Format(); \
+ LOG_ERROR_IC("ICP35", "event processing took too much time %s", report.data()); \
+ } \
+ TProfiled::Finish(); \
+ } \
+ }
+
+ template <typename T>
+ void Ignore(T& /*ev*/) {
+ ICPROXY_PROFILED;
+ }
+
+ void Ignore() {
+ ICPROXY_PROFILED;
+ }
+
+ void Ignore(TEvHandshakeDone::TPtr& ev) {
+ ICPROXY_PROFILED;
+
+ Y_VERIFY(ev->Sender != IncomingHandshakeActor);
+ Y_VERIFY(ev->Sender != OutgoingHandshakeActor);
+ }
+
+ void Ignore(TEvHandshakeFail::TPtr& ev) {
+ ICPROXY_PROFILED;
+
+ Y_VERIFY(ev->Sender != IncomingHandshakeActor);
+ Y_VERIFY(ev->Sender != OutgoingHandshakeActor);
+ LogHandshakeFail(ev, true);
+ }
+
+ const char* State = nullptr;
+ TInstant StateSwitchTime;
+
+ template <typename... TArgs>
+ void SwitchToState(int line, const char* name, TArgs&&... args) {
+ ICPROXY_PROFILED;
+
+ LOG_DEBUG_IC("ICP77", "@%d %s -> %s", line, State, name);
+ State = name;
+ StateSwitchTime = TActivationContext::Now();
+ Become(std::forward<TArgs>(args)...);
+ Y_VERIFY(!Terminated || CurrentStateFunc() == &TThis::HoldByError); // ensure we never escape this state
+ if (CurrentStateFunc() != &TThis::PendingActivation) {
+ PassAwayTimestamp = TInstant::Max();
+ }
+ }
+
+ TInstant PassAwayTimestamp;
+ bool PassAwayScheduled = false;
+
+ void SwitchToInitialState() {
+ ICPROXY_PROFILED;
+
+ Y_VERIFY(!PendingSessionEvents && !PendingIncomingHandshakeEvents, "%s PendingSessionEvents# %zu"
+ " PendingIncomingHandshakeEvents# %zu State# %s", LogPrefix.data(), PendingSessionEvents.size(),
+ PendingIncomingHandshakeEvents.size(), State);
+ SwitchToState(__LINE__, "PendingActivation", &TThis::PendingActivation);
+ if (DynamicPtr && !PassAwayScheduled && PassAwayTimestamp != TInstant::Max()) {
+ TActivationContext::Schedule(PassAwayTimestamp, new IEventHandle(EvPassAwayIfNeeded, 0, SelfId(),
+ {}, nullptr, 0));
+ PassAwayScheduled = true;
+ }
+ }
+
+ void HandlePassAwayIfNeeded() {
+ Y_VERIFY(PassAwayScheduled);
+ if (PassAwayTimestamp != TInstant::Max()) {
+ PassAway();
+ }
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // PendingActivation
+ //
+ // In this state we are just waiting for some activities, which may include:
+ // * an external Session event
+ // * incoming handshake request
+ //
+ // Upon receiving such event, we put it to corresponding queue and initiate start up by calling IssueGetNodeRequest,
+ // which, as the name says, issued TEvGetNode to the nameservice and arms timer to handle timeout (which should not
+ // occur, but we want to be sure we don't hang on this), and then switches to PendingNodeInfo state.
+
+ PROXY_STFUNC(PendingActivation,
+ RequestNodeInfo, // Session events
+ RequestNodeInfoForIncomingHandshake, // Incoming handshake requests
+ Ignore, // Handshake status
+ Ignore, // Disconnect request
+ Ignore, // Wakeup
+ Ignore // Node info
+ )
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // PendingNodeInfo
+ //
+ // This state is entered when we asked nameserver to provide description for peer node we are working with. All
+ // external Session events and incoming handshake requests are enqueued into their respective queues, TEvNodeInfo
+ // is main event that triggers processing. On success, we try to initiate outgoing handshake if needed, or process
+ // incoming handshakes. On error, we enter HoldByError state.
+ //
+ // NOTE: handshake status events are also enqueued as the handshake actor may have generated failure event due to
+ // timeout or some other reason without waiting for acknowledge, and it must be processed correctly to prevent
+ // session hang
+
+ PROXY_STFUNC(PendingNodeInfo,
+ EnqueueSessionEvent, // Session events
+ EnqueueIncomingHandshakeEvent, // Incoming handshake requests
+ EnqueueIncomingHandshakeEvent, // Handshake status
+ Disconnect, // Disconnect request
+ ConfigureTimeout, // Wakeup
+ Configure // Node info
+ )
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // PendingConnection
+ //
+ // Here we have issued outgoing handshake or have accepted (or may be both) incoming handshake and we are waiting for
+ // the status of the handshake. When one if handshakes finishes, we use this status to establish connection (or to
+ // go to error state). When one handshake terminates with error while other is running, we will still wait for the
+ // second one to finish.
+
+ PROXY_STFUNC(PendingConnection,
+ EnqueueSessionEvent, // Session events
+ IncomingHandshake, // Incoming handshake requests
+ HandleHandshakeStatus, // Handshake status
+ Disconnect, // Disconnect request
+ Ignore, // Wakeup
+ Ignore // Node info
+ )
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // StateWork
+ //
+ // We have accepted session and process any incoming messages with the session. Incoming handshakes are accepted
+ // concurrently and applied when finished.
+
+ PROXY_STFUNC(StateWork,
+ ForwardSessionEventToSession, // Session events
+ IncomingHandshake, // Incoming handshake requests
+ HandleHandshakeStatus, // Handshake status
+ Disconnect, // Disconnect request
+ Ignore, // Wakeup
+ Ignore // Node info
+ )
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // HoldByError
+ //
+ // When something bad happens with the connection, we sleep in this state. After wake up we go back to
+ // PendingActivation.
+
+ PROXY_STFUNC(HoldByError,
+ DropSessionEvent, // Session events
+ RequestNodeInfoForIncomingHandshake, // Incoming handshake requests
+ Ignore, // Handshake status
+ Ignore, // Disconnect request
+ WakeupFromErrorState, // Wakeup
+ Ignore // Node info
+ )
+
+#undef SESSION_EVENTS
+#undef INCOMING_HANDSHAKE_EVENTS
+#undef HANDSHAKE_STATUS_EVENTS
+#undef PROXY_STFUNC
+
+ void ForwardSessionEventToSession(STATEFN_SIG);
+ void EnqueueSessionEvent(STATEFN_SIG);
+
+ // Incoming handshake handlers, including special wrapper when the IncomingHandshake is used as fFunc
+ void IncomingHandshake(STATEFN_SIG) {
+ switch (ev->GetTypeRewrite()) {
+ hFunc(TEvHandshakeAsk, IncomingHandshake);
+ hFunc(TEvHandshakeRequest, IncomingHandshake);
+ default:
+ Y_FAIL();
+ }
+ }
+ void IncomingHandshake(TEvHandshakeAsk::TPtr& ev);
+ void IncomingHandshake(TEvHandshakeRequest::TPtr& ev);
+
+ void RequestNodeInfo(STATEFN_SIG);
+ void RequestNodeInfoForIncomingHandshake(STATEFN_SIG);
+
+ void StartInitialHandshake();
+ void StartResumeHandshake(ui64 inputCounter);
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // Incoming handshake event queue processing
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ void EnqueueIncomingHandshakeEvent(STATEFN_SIG);
+ void EnqueueIncomingHandshakeEvent(TEvHandshakeDone::TPtr& ev);
+ void EnqueueIncomingHandshakeEvent(TEvHandshakeFail::TPtr& ev);
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // PendingNodeInfo
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ IEventBase* ConfigureTimeoutCookie; // pointer to the scheduled event used to match sent and received events
+
+ void StartConfiguring();
+ void Configure(TEvInterconnect::TEvNodeInfo::TPtr& ev);
+ void ConfigureTimeout(TEvents::TEvWakeup::TPtr& ev);
+ void ProcessConfigured();
+
+ void HandleHandshakeStatus(TEvHandshakeDone::TPtr& ev);
+ void HandleHandshakeStatus(TEvHandshakeFail::TPtr& ev);
+
+ void TransitToErrorState(TString Explanation, bool updateErrorLog = true);
+ void WakeupFromErrorState(TEvents::TEvWakeup::TPtr& ev);
+ void Disconnect();
+
+ const ui32 PeerNodeId;
+ IActor **DynamicPtr;
+
+ void ValidateEvent(TAutoPtr<IEventHandle>& ev, const char* func) {
+ if (SelfId().NodeId() == PeerNodeId) {
+ TString msg = Sprintf("Event Type# 0x%08" PRIx32 " TypeRewrite# 0x%08" PRIx32
+ " from Sender# %s sent to the proxy for the node itself via Interconnect;"
+ " THIS IS NOT A BUG IN INTERCONNECT, check the event sender instead",
+ ev->Type, ev->GetTypeRewrite(), ev->Sender.ToString().data());
+ LOG_ERROR_IC("ICP03", "%s", msg.data());
+ Y_VERIFY_DEBUG(false, "%s", msg.data());
+ }
+
+ Y_VERIFY(ev->GetTypeRewrite() != TEvInterconnect::EvForward || ev->Recipient.NodeId() == PeerNodeId,
+ "Recipient/Proxy NodeId mismatch Recipient# %s Type# 0x%08" PRIx32 " PeerNodeId# %" PRIu32 " Func# %s",
+ ev->Recipient.ToString().data(), ev->Type, PeerNodeId, func);
+ }
+
+ // Common with helpers
+ // All proxy actors share the same information in the object
+ // read only
+ TInterconnectProxyCommon::TPtr const Common;
+
+ const TActorId& GetNameserviceId() const {
+ return Common->NameserviceId;
+ }
+
+ TString TechnicalPeerHostName;
+
+ std::shared_ptr<IInterconnectMetrics> Metrics;
+
+ void HandleClosePeerSocket();
+ void HandleCloseInputSession();
+ void HandlePoisonSession();
+
+ void HandleSessionBufferSizeRequest(TEvSessionBufferSizeRequest::TPtr& ev);
+
+ bool CleanupEventQueueScheduled = false;
+ void ScheduleCleanupEventQueue();
+ void HandleCleanupEventQueue();
+ void CleanupEventQueue();
+
+ // hold all events before connection is established
+ struct TPendingSessionEvent {
+ TInstant Deadline;
+ ui32 Size;
+ THolder<IEventHandle> Event;
+
+ TPendingSessionEvent(TInstant deadline, ui32 size, TAutoPtr<IEventHandle> event)
+ : Deadline(deadline)
+ , Size(size)
+ , Event(event)
+ {}
+ };
+ TDeque<TPendingSessionEvent> PendingSessionEvents;
+ ui64 PendingSessionEventsSize = 0;
+ void ProcessPendingSessionEvents();
+ void DropSessionEvent(STATEFN_SIG);
+
+ TInterconnectSessionTCP* Session = nullptr;
+ TActorId SessionID;
+
+ // virtual ids used during handshake to check if it is the connection
+ // for the same session or to find out the latest shandshake
+ // it's virtual because session actor apears after successfull handshake
+ TActorId SessionVirtualId;
+ TActorId RemoteSessionVirtualId;
+
+ TActorId GenerateSessionVirtualId() {
+ ICPROXY_PROFILED;
+
+ const ui64 localId = TlsActivationContext->ExecutorThread.ActorSystem->AllocateIDSpace(1);
+ return NActors::TActorId(SelfId().NodeId(), 0, localId, 0);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ TActorId IncomingHandshakeActor;
+ TInstant IncomingHandshakeActorFilledIn;
+ TInstant IncomingHandshakeActorReset;
+ TMaybe<ui64> LastSerialFromIncomingHandshake;
+ THolder<IEventBase> HeldHandshakeReply;
+
+ void DropIncomingHandshake(bool poison = true) {
+ ICPROXY_PROFILED;
+
+ if (const TActorId& actorId = std::exchange(IncomingHandshakeActor, TActorId())) {
+ LOG_DEBUG_IC("ICP111", "dropped incoming handshake: %s poison: %s", actorId.ToString().data(),
+ poison ? "true" : "false");
+ if (poison) {
+ Send(actorId, new TEvents::TEvPoisonPill);
+ }
+ LastSerialFromIncomingHandshake.Clear();
+ HeldHandshakeReply.Reset();
+ IncomingHandshakeActorReset = TActivationContext::Now();
+ }
+ }
+
+ void DropOutgoingHandshake(bool poison = true) {
+ ICPROXY_PROFILED;
+
+ if (const TActorId& actorId = std::exchange(OutgoingHandshakeActor, TActorId())) {
+ LOG_DEBUG_IC("ICP112", "dropped outgoing handshake: %s poison: %s", actorId.ToString().data(),
+ poison ? "true" : "false");
+ if (poison) {
+ Send(actorId, new TEvents::TEvPoisonPill);
+ }
+ OutgoingHandshakeActorReset = TActivationContext::Now();
+ }
+ }
+
+ void DropHandshakes() {
+ ICPROXY_PROFILED;
+
+ DropIncomingHandshake();
+ DropOutgoingHandshake();
+ }
+
+ void PrepareNewSessionHandshake() {
+ ICPROXY_PROFILED;
+
+ // drop existing session if we have one
+ if (Session) {
+ LOG_INFO_IC("ICP04", "terminating current session as we are negotiating a new one");
+ IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::Terminate, TDisconnectReason::NewSession());
+ }
+
+ // ensure we have no current session
+ Y_VERIFY(!Session);
+
+ // switch to pending connection state -- we wait for handshakes, we want more handshakes!
+ SwitchToState(__LINE__, "PendingConnection", &TThis::PendingConnection);
+ }
+
+ void IssueIncomingHandshakeReply(const TActorId& handshakeId, ui64 peerLocalId,
+ THolder<IEventBase> event);
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ TActorId OutgoingHandshakeActor;
+ TInstant OutgoingHandshakeActorCreated;
+ TInstant OutgoingHandshakeActorReset;
+
+ TInstant LastSessionDieTime;
+
+ void GenerateHttpInfo(NMon::TEvHttpInfo::TPtr& ev);
+
+ void Handle(TEvQueryStats::TPtr& ev);
+
+ TDuration HoldByErrorWakeupDuration = TDuration::Zero();
+ TEvents::TEvWakeup* HoldByErrorWakeupCookie;
+
+ THolder<TProgramInfo> RemoteProgramInfo;
+ NInterconnect::TSecureSocketContext::TPtr SecureContext;
+
+ void Handle(TEvGetSecureSocket::TPtr ev) {
+ auto socket = MakeIntrusive<NInterconnect::TSecureSocket>(*ev->Get()->Socket, SecureContext);
+ Send(ev->Sender, new TEvSecureSocket(std::move(socket)));
+ }
+
+ TDeque<THolder<IEventHandle>> PendingIncomingHandshakeEvents;
+
+ TDeque<std::tuple<TInstant, TString, TString, ui32>> ErrorStateLog;
+
+ void UpdateErrorStateLog(TInstant now, TString kind, TString explanation) {
+ ICPROXY_PROFILED;
+
+ if (ErrorStateLog) {
+ auto& back = ErrorStateLog.back();
+ TString lastKind, lastExpl;
+ if (kind == std::get<1>(back) && explanation == std::get<2>(back)) {
+ std::get<0>(back) = now;
+ ++std::get<3>(back);
+ return;
+ }
+ }
+
+ ErrorStateLog.emplace_back(now, std::move(kind), std::move(explanation), 1);
+ if (ErrorStateLog.size() > 20) {
+ ErrorStateLog.pop_front();
+ }
+ }
+
+ void LogHandshakeFail(TEvHandshakeFail::TPtr& ev, bool inconclusive);
+
+ bool Terminated = false;
+ void HandleTerminate();
+
+ void PassAway() override;
+ };
+
+}
diff --git a/library/cpp/actors/interconnect/interconnect_tcp_server.cpp b/library/cpp/actors/interconnect/interconnect_tcp_server.cpp
new file mode 100644
index 0000000000..b95c994598
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_tcp_server.cpp
@@ -0,0 +1,117 @@
+#include "interconnect_tcp_server.h"
+#include "interconnect_handshake.h"
+
+#include <library/cpp/actors/core/log.h>
+#include <library/cpp/actors/protos/services_common.pb.h>
+
+#include "interconnect_common.h"
+
+namespace NActors {
+ TInterconnectListenerTCP::TInterconnectListenerTCP(const TString& address, ui16 port, TInterconnectProxyCommon::TPtr common, const TMaybe<SOCKET>& socket)
+ : TActor(&TThis::Initial)
+ , TInterconnectLoggingBase(Sprintf("ICListener: %s", SelfId().ToString().data()))
+ , Address(address.c_str(), port)
+ , Listener(
+ socket
+ ? new NInterconnect::TStreamSocket(*socket)
+ : nullptr)
+ , ExternalSocket(!!Listener)
+ , ProxyCommonCtx(std::move(common))
+ {
+ if (ExternalSocket) {
+ SetNonBlock(*Listener);
+ }
+ }
+
+ TAutoPtr<IEventHandle> TInterconnectListenerTCP::AfterRegister(const TActorId& self, const TActorId& parentId) {
+ return new IEventHandle(self, parentId, new TEvents::TEvBootstrap, 0);
+ }
+
+ void TInterconnectListenerTCP::Die(const TActorContext& ctx) {
+ LOG_DEBUG_IC("ICL08", "Dying");
+ TActor::Die(ctx);
+ }
+
+ int TInterconnectListenerTCP::Bind() {
+ NInterconnect::TAddress addr = Address;
+
+ if (ProxyCommonCtx->Settings.BindOnAllAddresses) {
+ switch (addr.GetFamily()) {
+ case AF_INET: {
+ auto *sa = reinterpret_cast<sockaddr_in*>(addr.SockAddr());
+ sa->sin_addr = {INADDR_ANY};
+ break;
+ }
+
+ case AF_INET6: {
+ auto *sa = reinterpret_cast<sockaddr_in6*>(addr.SockAddr());
+ sa->sin6_addr = in6addr_any;
+ break;
+ }
+
+ default:
+ Y_FAIL("Unsupported address family");
+ }
+ }
+
+ Listener = NInterconnect::TStreamSocket::Make(addr.GetFamily());
+ if (*Listener == -1) {
+ return errno;
+ }
+ SetNonBlock(*Listener);
+ Listener->SetSendBufferSize(ProxyCommonCtx->Settings.GetSendBufferSize()); // TODO(alexvru): WTF?
+ SetSockOpt(*Listener, SOL_SOCKET, SO_REUSEADDR, 1);
+ if (const auto e = -Listener->Bind(addr)) {
+ return e;
+ } else if (const auto e = -Listener->Listen(SOMAXCONN)) {
+ return e;
+ } else {
+ return 0;
+ }
+ }
+
+ void TInterconnectListenerTCP::Bootstrap(const TActorContext& ctx) {
+ if (!Listener) {
+ if (const int err = Bind()) {
+ LOG_ERROR_IC("ICL01", "Bind failed: %s (%s)", strerror(err), Address.ToString().data());
+ Listener.Reset();
+ Become(&TThis::Initial, TDuration::Seconds(1), new TEvents::TEvBootstrap);
+ return;
+ }
+ }
+ if (const auto& callback = ProxyCommonCtx->InitWhiteboard) {
+ callback(Address.GetPort(), TlsActivationContext->ExecutorThread.ActorSystem);
+ }
+ const bool success = ctx.Send(MakePollerActorId(), new TEvPollerRegister(Listener, SelfId(), {}));
+ Y_VERIFY(success);
+ Become(&TThis::Listen);
+ }
+
+ void TInterconnectListenerTCP::Handle(TEvPollerRegisterResult::TPtr ev, const TActorContext& ctx) {
+ PollerToken = std::move(ev->Get()->PollerToken);
+ Process(ctx);
+ }
+
+ void TInterconnectListenerTCP::Process(const TActorContext& ctx) {
+ for (;;) {
+ NInterconnect::TAddress address;
+ const int r = Listener->Accept(address);
+ if (r >= 0) {
+ LOG_DEBUG_IC("ICL04", "Accepted from: %s", address.ToString().data());
+ auto socket = MakeIntrusive<NInterconnect::TStreamSocket>(static_cast<SOCKET>(r));
+ ctx.Register(CreateIncomingHandshakeActor(ProxyCommonCtx, std::move(socket)));
+ continue;
+ } else if (-r != EAGAIN && -r != EWOULDBLOCK) {
+ Y_VERIFY(-r != ENFILE && -r != EMFILE && !ExternalSocket);
+ LOG_ERROR_IC("ICL06", "Listen failed: %s (%s)", strerror(-r), Address.ToString().data());
+ Listener.Reset();
+ PollerToken.Reset();
+ Become(&TThis::Initial, TDuration::Seconds(1), new TEvents::TEvBootstrap);
+ } else if (PollerToken) {
+ PollerToken->Request(true, false);
+ }
+ break;
+ }
+ }
+
+}
diff --git a/library/cpp/actors/interconnect/interconnect_tcp_server.h b/library/cpp/actors/interconnect/interconnect_tcp_server.h
new file mode 100644
index 0000000000..fc71073c2d
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_tcp_server.h
@@ -0,0 +1,57 @@
+#pragma once
+
+#include <library/cpp/actors/core/hfunc.h>
+#include <library/cpp/actors/core/event_pb.h>
+#include <library/cpp/actors/core/events.h>
+
+#include "interconnect_common.h"
+#include "poller_actor.h"
+#include "events_local.h"
+
+namespace NActors {
+ class TInterconnectListenerTCP: public TActor<TInterconnectListenerTCP>, public TInterconnectLoggingBase {
+ public:
+ static constexpr EActivityType ActorActivityType() {
+ return INTERCONNECT_COMMON;
+ }
+
+ TInterconnectListenerTCP(const TString& address, ui16 port, TInterconnectProxyCommon::TPtr common, const TMaybe<SOCKET>& socket = Nothing());
+ int Bind();
+
+ private:
+ STFUNC(Initial) {
+ switch (ev->GetTypeRewrite()) {
+ CFunc(TEvents::TEvBootstrap::EventType, Bootstrap);
+ CFunc(TEvents::TEvPoisonPill::EventType, Die);
+ }
+ }
+
+ STFUNC(Listen) {
+ switch (ev->GetTypeRewrite()) {
+ CFunc(TEvents::TEvPoisonPill::EventType, Die);
+ HFunc(TEvPollerRegisterResult, Handle);
+ CFunc(TEvPollerReady::EventType, Process);
+ }
+ }
+
+ TAutoPtr<IEventHandle> AfterRegister(const TActorId& self, const TActorId& parentId) override;
+
+ void Die(const TActorContext& ctx) override;
+
+ void Bootstrap(const TActorContext& ctx);
+ void Handle(TEvPollerRegisterResult::TPtr ev, const TActorContext& ctx);
+
+ void Process(const TActorContext& ctx);
+
+ const NInterconnect::TAddress Address;
+ TIntrusivePtr<NInterconnect::TStreamSocket> Listener;
+ const bool ExternalSocket;
+ TPollerToken::TPtr PollerToken;
+ TInterconnectProxyCommon::TPtr const ProxyCommonCtx;
+ };
+
+ static inline TActorId MakeInterconnectListenerActorId(bool dynamic) {
+ char x[12] = {'I', 'C', 'L', 'i', 's', 't', 'e', 'n', 'e', 'r', '/', dynamic ? 'D' : 'S'};
+ return TActorId(0, TStringBuf(x, 12));
+ }
+}
diff --git a/library/cpp/actors/interconnect/interconnect_tcp_session.cpp b/library/cpp/actors/interconnect/interconnect_tcp_session.cpp
new file mode 100644
index 0000000000..2ded7f9f53
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_tcp_session.cpp
@@ -0,0 +1,1228 @@
+#include "interconnect_tcp_proxy.h"
+#include "interconnect_tcp_session.h"
+#include "interconnect_handshake.h"
+
+#include <library/cpp/actors/core/probes.h>
+#include <library/cpp/actors/core/log.h>
+#include <library/cpp/actors/core/interconnect.h>
+#include <library/cpp/actors/util/datetime.h>
+#include <library/cpp/actors/protos/services_common.pb.h>
+#include <library/cpp/monlib/service/pages/templates.h>
+
+namespace NActors {
+ LWTRACE_USING(ACTORLIB_PROVIDER);
+
+ DECLARE_WILSON_EVENT(OutputQueuePush, (ui32, QueueSizeInEvents), (ui64, QueueSizeInBytes));
+
+ template<typename T>
+ T Coalesce(T&& x) {
+ return x;
+ }
+
+ template<typename T, typename T2, typename... TRest>
+ typename std::common_type<T, T2, TRest...>::type Coalesce(T&& first, T2&& mid, TRest&&... rest) {
+ if (first != typename std::remove_reference<T>::type()) {
+ return first;
+ } else {
+ return Coalesce(std::forward<T2>(mid), std::forward<TRest>(rest)...);
+ }
+ }
+
+ TInterconnectSessionTCP::TInterconnectSessionTCP(TInterconnectProxyTCP* const proxy, TSessionParams params)
+ : TActor(&TInterconnectSessionTCP::StateFunc)
+ , Created(TInstant::Now())
+ , Proxy(proxy)
+ , CloseOnIdleWatchdog(GetCloseOnIdleTimeout(), std::bind(&TThis::OnCloseOnIdleTimerHit, this))
+ , LostConnectionWatchdog(GetLostConnectionTimeout(), std::bind(&TThis::OnLostConnectionTimerHit, this))
+ , Params(std::move(params))
+ , TotalOutputQueueSize(0)
+ , OutputStuckFlag(false)
+ , OutputQueueUtilization(16)
+ , OutputCounter(0ULL)
+ {
+ Proxy->Metrics->SetConnected(0);
+ ReceiveContext.Reset(new TReceiveContext);
+ }
+
+ TInterconnectSessionTCP::~TInterconnectSessionTCP() {
+ // close socket ASAP when actor system is being shut down
+ if (Socket) {
+ Socket->Shutdown(SHUT_RDWR);
+ }
+ }
+
+ void TInterconnectSessionTCP::Init() {
+ auto destroyCallback = [as = TlsActivationContext->ExecutorThread.ActorSystem, id = Proxy->Common->DestructorId](THolder<IEventBase> event) {
+ as->Send(id, event.Release());
+ };
+ Pool.ConstructInPlace(Proxy->Common, std::move(destroyCallback));
+ ChannelScheduler.ConstructInPlace(Proxy->PeerNodeId, Proxy->Common->ChannelsConfig, Proxy->Metrics, *Pool,
+ Proxy->Common->Settings.MaxSerializedEventSize, Params);
+
+ LOG_INFO(*TlsActivationContext, NActorsServices::INTERCONNECT_STATUS, "[%u] session created", Proxy->PeerNodeId);
+ SetPrefix(Sprintf("Session %s [node %" PRIu32 "]", SelfId().ToString().data(), Proxy->PeerNodeId));
+ SendUpdateToWhiteboard();
+ }
+
+ void TInterconnectSessionTCP::CloseInputSession() {
+ Send(ReceiverId, new TEvInterconnect::TEvCloseInputSession);
+ }
+
+ void TInterconnectSessionTCP::Handle(TEvTerminate::TPtr& ev) {
+ Terminate(ev->Get()->Reason);
+ }
+
+ void TInterconnectSessionTCP::HandlePoison() {
+ Terminate(TDisconnectReason());
+ }
+
+ void TInterconnectSessionTCP::Terminate(TDisconnectReason reason) {
+ LOG_INFO_IC_SESSION("ICS01", "socket: %" PRIi64, (Socket ? i64(*Socket) : -1));
+
+ IActor::InvokeOtherActor(*Proxy, &TInterconnectProxyTCP::UnregisterSession, this);
+ ShutdownSocket(std::move(reason));
+
+ for (const auto& kv : Subscribers) {
+ Send(kv.first, new TEvInterconnect::TEvNodeDisconnected(Proxy->PeerNodeId), 0, kv.second);
+ }
+ Proxy->Metrics->SubSubscribersCount(Subscribers.size());
+ Subscribers.clear();
+
+ ChannelScheduler->ForEach([&](TEventOutputChannel& channel) {
+ channel.NotifyUndelivered();
+ });
+
+ if (ReceiverId) {
+ Send(ReceiverId, new TEvents::TEvPoisonPill);
+ }
+
+ SendUpdateToWhiteboard(false);
+
+ Proxy->Metrics->SubOutputBuffersTotalSize(TotalOutputQueueSize);
+ Proxy->Metrics->SubInflightDataAmount(InflightDataAmount);
+
+ LOG_INFO(*TlsActivationContext, NActorsServices::INTERCONNECT_STATUS, "[%u] session destroyed", Proxy->PeerNodeId);
+
+ if (!Subscribers.empty()) {
+ Proxy->Metrics->SubSubscribersCount(Subscribers.size());
+ }
+
+ TActor::PassAway();
+ }
+
+ void TInterconnectSessionTCP::PassAway() {
+ Y_FAIL("TInterconnectSessionTCP::PassAway() can't be called directly");
+ }
+
+ void TInterconnectSessionTCP::Forward(STATEFN_SIG) {
+ Proxy->ValidateEvent(ev, "Forward");
+
+ LOG_DEBUG_IC_SESSION("ICS02", "send event from: %s to: %s", ev->Sender.ToString().data(), ev->Recipient.ToString().data());
+ ++MessagesGot;
+
+ if (ev->Flags & IEventHandle::FlagSubscribeOnSession) {
+ Subscribe(ev);
+ }
+
+ ui16 evChannel = ev->GetChannel();
+ auto& oChannel = ChannelScheduler->GetOutputChannel(evChannel);
+ const bool wasWorking = oChannel.IsWorking();
+
+ const auto [dataSize, event] = oChannel.Push(*ev);
+ LWTRACK(ForwardEvent, event->Orbit, Proxy->PeerNodeId, event->Descr.Type, event->Descr.Flags, LWACTORID(event->Descr.Recipient), LWACTORID(event->Descr.Sender), event->Descr.Cookie, event->EventSerializedSize);
+
+ TotalOutputQueueSize += dataSize;
+ Proxy->Metrics->AddOutputBuffersTotalSize(dataSize);
+ if (!wasWorking) {
+ // this channel has returned to work -- it was empty and this we have just put first event in the queue
+ ChannelScheduler->AddToHeap(oChannel, EqualizeCounter);
+ }
+
+ SetOutputStuckFlag(true);
+ ++NumEventsInReadyChannels;
+
+ LWTRACK(EnqueueEvent, event->Orbit, Proxy->PeerNodeId, NumEventsInReadyChannels, GetWriteBlockedTotal(), evChannel, oChannel.GetQueueSize(), oChannel.GetBufferedAmountOfData());
+ WILSON_TRACE(*TlsActivationContext, &ev->TraceId, OutputQueuePush,
+ QueueSizeInEvents = oChannel.GetQueueSize(),
+ QueueSizeInBytes = oChannel.GetBufferedAmountOfData());
+
+ // check for overloaded queues
+ ui64 sendBufferDieLimit = Proxy->Common->Settings.SendBufferDieLimitInMB * ui64(1 << 20);
+ if (sendBufferDieLimit != 0 && TotalOutputQueueSize > sendBufferDieLimit) {
+ LOG_ERROR_IC_SESSION("ICS03", "socket: %" PRIi64 " output queue is overloaded, actual %" PRIu64 " bytes, limit is %" PRIu64,
+ Socket ? i64(*Socket) : -1, TotalOutputQueueSize, sendBufferDieLimit);
+ return Terminate(TDisconnectReason::QueueOverload());
+ }
+
+ ui64 outputBuffersTotalSizeLimit = Proxy->Common->Settings.OutputBuffersTotalSizeLimitInMB * ui64(1 << 20);
+ if (outputBuffersTotalSizeLimit != 0 && static_cast<ui64>(Proxy->Metrics->GetOutputBuffersTotalSize()) > outputBuffersTotalSizeLimit) {
+ LOG_ERROR_IC_SESSION("ICS77", "Exceeded total limit on output buffers size");
+ if (AtomicTryLock(&Proxy->Common->StartedSessionKiller)) {
+ CreateSessionKillingActor(Proxy->Common);
+ }
+ }
+
+ if (RamInQueue && !RamInQueue->Batching) {
+ // we have pending TEvRam, so GenerateTraffic will be called no matter what
+ } else if (InflightDataAmount >= GetTotalInflightAmountOfData() || !Socket || ReceiveContext->WriteBlockedByFullSendBuffer) {
+ // we can't issue more traffic now; GenerateTraffic will be called upon unblocking
+ } else if (TotalOutputQueueSize >= 64 * 1024) {
+ // output queue size is quite big to issue some traffic
+ GenerateTraffic();
+ } else if (!RamInQueue) {
+ Y_VERIFY_DEBUG(NumEventsInReadyChannels == 1);
+ RamInQueue = new TEvRam(true);
+ auto *ev = new IEventHandle(SelfId(), {}, RamInQueue);
+ const TDuration batchPeriod = Proxy->Common->Settings.BatchPeriod;
+ if (batchPeriod != TDuration()) {
+ TActivationContext::Schedule(batchPeriod, ev);
+ } else {
+ TActivationContext::Send(ev);
+ }
+ LWPROBE(StartBatching, Proxy->PeerNodeId, batchPeriod.MillisecondsFloat());
+ LOG_DEBUG_IC_SESSION("ICS17", "batching started");
+ }
+ }
+
+ void TInterconnectSessionTCP::Subscribe(STATEFN_SIG) {
+ LOG_DEBUG_IC_SESSION("ICS04", "subscribe for session state for %s", ev->Sender.ToString().data());
+ const auto [it, inserted] = Subscribers.emplace(ev->Sender, ev->Cookie);
+ if (inserted) {
+ Proxy->Metrics->IncSubscribersCount();
+ } else {
+ it->second = ev->Cookie;
+ }
+ Send(ev->Sender, new TEvInterconnect::TEvNodeConnected(Proxy->PeerNodeId), 0, ev->Cookie);
+ }
+
+ void TInterconnectSessionTCP::Unsubscribe(STATEFN_SIG) {
+ LOG_DEBUG_IC_SESSION("ICS05", "unsubscribe for session state for %s", ev->Sender.ToString().data());
+ Proxy->Metrics->SubSubscribersCount( Subscribers.erase(ev->Sender));
+ }
+
+ THolder<TEvHandshakeAck> TInterconnectSessionTCP::ProcessHandshakeRequest(TEvHandshakeAsk::TPtr& ev) {
+ TEvHandshakeAsk *msg = ev->Get();
+
+ // close existing input session, if any, and do nothing upon its destruction
+ ReestablishConnection({}, false, TDisconnectReason::NewSession());
+ const ui64 lastInputSerial = ReceiveContext->LockLastProcessedPacketSerial();
+
+ LOG_INFO_IC_SESSION("ICS08", "incoming handshake Self# %s Peer# %s Counter# %" PRIu64 " LastInputSerial# %" PRIu64,
+ msg->Self.ToString().data(), msg->Peer.ToString().data(), msg->Counter, lastInputSerial);
+
+ return MakeHolder<TEvHandshakeAck>(msg->Peer, lastInputSerial, Params);
+ }
+
+ void TInterconnectSessionTCP::SetNewConnection(TEvHandshakeDone::TPtr& ev) {
+ if (ReceiverId) {
+ // upon destruction of input session actor invoke this callback again
+ ReestablishConnection(std::move(ev), false, TDisconnectReason::NewSession());
+ return;
+ }
+
+ LOG_INFO_IC_SESSION("ICS09", "handshake done sender: %s self: %s peer: %s socket: %" PRIi64,
+ ev->Sender.ToString().data(), ev->Get()->Self.ToString().data(), ev->Get()->Peer.ToString().data(),
+ i64(*ev->Get()->Socket));
+
+ NewConnectionSet = TActivationContext::Now();
+ PacketsWrittenToSocket = 0;
+
+ SendBufferSize = ev->Get()->Socket->GetSendBufferSize();
+ Socket = std::move(ev->Get()->Socket);
+
+ // there may be a race
+ const ui64 nextPacket = Max(LastConfirmed, ev->Get()->NextPacket);
+
+ // arm watchdogs
+ CloseOnIdleWatchdog.Arm(SelfId());
+
+ // reset activity timestamps
+ LastInputActivityTimestamp = LastPayloadActivityTimestamp = TActivationContext::Now();
+
+ LOG_INFO_IC_SESSION("ICS10", "traffic start");
+
+ // create input session actor
+ auto actor = MakeHolder<TInputSessionTCP>(SelfId(), Socket, ReceiveContext, Proxy->Common,
+ Proxy->Metrics, Proxy->PeerNodeId, nextPacket, GetDeadPeerTimeout(), Params);
+ ReceiveContext->UnlockLastProcessedPacketSerial();
+ ReceiverId = Params.Encryption ? RegisterWithSameMailbox(actor.Release()) : Register(actor.Release(), TMailboxType::ReadAsFilled);
+
+ // register our socket in poller actor
+ LOG_DEBUG_IC_SESSION("ICS11", "registering socket in PollerActor");
+ const bool success = Send(MakePollerActorId(), new TEvPollerRegister(Socket, ReceiverId, SelfId()));
+ Y_VERIFY(success);
+ ReceiveContext->WriteBlockedByFullSendBuffer = false;
+
+ LostConnectionWatchdog.Disarm();
+ Proxy->Metrics->SetConnected(1);
+ LOG_INFO(*TlsActivationContext, NActorsServices::INTERCONNECT_STATUS, "[%u] connected", Proxy->PeerNodeId);
+
+ // arm pinger timer
+ ResetFlushLogic();
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // REINITIALIZE SEND QUEUE
+ //
+ // scan through send queue and leave only those packets who have data -- we will simply resend them; drop all other
+ // auxiliary packets; also reset packet metrics to zero to start sending from the beginning
+ // also reset SendQueuePos
+
+ // drop confirmed packets first as we do not need unwanted retransmissions
+ SendQueuePos = SendQueue.end();
+ DropConfirmed(nextPacket);
+
+ for (TSendQueue::iterator it = SendQueue.begin(); it != SendQueue.end(); ) {
+ const TSendQueue::iterator next = std::next(it);
+ if (it->IsEmpty()) {
+ SendQueueCache.splice(SendQueueCache.begin(), SendQueue, it);
+ } else {
+ it->ResetBufs();
+ }
+ it = next;
+ }
+ TrimSendQueueCache();
+ SendQueuePos = SendQueue.begin();
+
+ TMaybe<ui64> s;
+ for (auto it = SendQueuePos; it != SendQueue.end(); ++it) {
+ if (!it->IsEmpty()) {
+ s = it->GetSerial();
+ }
+ }
+ const ui64 serial = s.GetOrElse(Max<ui64>());
+
+ Y_VERIFY(serial > LastConfirmed, "%s serial# %" PRIu64 " LastConfirmed# %" PRIu64, LogPrefix.data(), serial, LastConfirmed);
+ LOG_DEBUG_IC_SESSION("ICS06", "rewind SendQueue size# %zu LastConfirmed# %" PRIu64 " SendQueuePos.Serial# %" PRIu64 "\n",
+ SendQueue.size(), LastConfirmed, serial);
+
+ BytesUnwritten = 0;
+ for (const auto& packet : SendQueue) {
+ BytesUnwritten += (Params.UseModernFrame ? sizeof(TTcpPacketHeader_v2) : sizeof(TTcpPacketHeader_v1)) +
+ packet.GetDataSize();
+ }
+
+ SwitchStuckPeriod();
+
+ LastHandshakeDone = TActivationContext::Now();
+
+ RamInQueue = nullptr;
+ GenerateTraffic();
+ }
+
+ void TInterconnectSessionTCP::Handle(TEvUpdateFromInputSession::TPtr& ev) {
+ if (ev->Sender == ReceiverId) {
+ TEvUpdateFromInputSession& msg = *ev->Get();
+
+ // update ping time
+ Ping = msg.Ping;
+ LWPROBE(UpdateFromInputSession, Proxy->PeerNodeId, Ping.MillisecondsFloat());
+
+ bool needConfirm = false;
+
+ // update activity timer for dead peer checker
+ LastInputActivityTimestamp = TActivationContext::Now();
+
+ if (msg.NumDataBytes) {
+ UnconfirmedBytes += msg.NumDataBytes;
+ if (UnconfirmedBytes >= GetTotalInflightAmountOfData() / 4) {
+ needConfirm = true;
+ } else {
+ SetForcePacketTimestamp(Proxy->Common->Settings.ForceConfirmPeriod);
+ }
+
+ // reset payload watchdog that controls close-on-idle behaviour
+ LastPayloadActivityTimestamp = TActivationContext::Now();
+ CloseOnIdleWatchdog.Reset();
+ }
+
+ bool unblockedSomething = false;
+ LWPROBE_IF_TOO_LONG(SlowICDropConfirmed, Proxy->PeerNodeId, ms) {
+ unblockedSomething = DropConfirmed(msg.ConfirmedByInput);
+ }
+
+ // generate more traffic if we have unblocked state now
+ if (unblockedSomething) {
+ LWPROBE(UnblockByDropConfirmed, Proxy->PeerNodeId, NHPTimer::GetSeconds(GetCycleCountFast() - ev->SendTime) * 1000.0);
+ GenerateTraffic();
+ }
+
+ // if we haven't generated any packets, then make a lone Flush packet without any data
+ if (needConfirm && Socket) {
+ ++ConfirmPacketsForcedBySize;
+ MakePacket(false);
+ }
+
+ for (;;) {
+ switch (EUpdateState state = ReceiveContext->UpdateState) {
+ case EUpdateState::NONE:
+ case EUpdateState::CONFIRMING:
+ Y_FAIL("unexpected state");
+
+ case EUpdateState::INFLIGHT:
+ // this message we are processing was the only one in flight, so we can reset state to NONE here
+ if (ReceiveContext->UpdateState.compare_exchange_weak(state, EUpdateState::NONE)) {
+ return;
+ }
+ break;
+
+ case EUpdateState::INFLIGHT_AND_PENDING:
+ // there is more messages pending from the input session actor, so we have to inform it to release
+ // that message
+ if (ReceiveContext->UpdateState.compare_exchange_weak(state, EUpdateState::CONFIRMING)) {
+ Send(ev->Sender, new TEvConfirmUpdate);
+ return;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ void TInterconnectSessionTCP::HandleRam(TEvRam::TPtr& ev) {
+ if (ev->Get() == RamInQueue) {
+ LWPROBE(FinishRam, Proxy->PeerNodeId, NHPTimer::GetSeconds(GetCycleCountFast() - ev->SendTime) * 1000.0);
+ RamInQueue = nullptr;
+ GenerateTraffic();
+ }
+ }
+
+ void TInterconnectSessionTCP::GenerateTraffic() {
+ // generate ping request, if needed
+ IssuePingRequest();
+
+ if (RamInQueue && !RamInQueue->Batching) {
+ LWPROBE(SkipGenerateTraffic, Proxy->PeerNodeId, NHPTimer::GetSeconds(GetCycleCountFast() - RamStartedCycles) * 1000.0);
+ return; // we'll do it a bit later
+ } else {
+ RamInQueue = nullptr;
+ }
+
+ LOG_DEBUG_IC_SESSION("ICS19", "GenerateTraffic");
+
+ // There is a tradeoff between fairness and efficiency.
+ // The less traffic is generated here, the less buffering is after fair scheduler,
+ // the more fair system is, the less latency is present.
+ // The more traffic is generated here, the less syscalls and actor-system overhead occurs,
+ // the less cpu is consumed.
+ static const ui64 generateLimit = 64 * 1024;
+
+ const ui64 sizeBefore = TotalOutputQueueSize;
+ ui32 generatedPackets = 0;
+ ui64 generatedBytes = 0;
+ ui64 generateStarted = GetCycleCountFast();
+
+ // apply traffic changes
+ auto accountTraffic = [&] { ChannelScheduler->ForEach([](TEventOutputChannel& channel) { channel.AccountTraffic(); }); };
+
+ // first, we create as many data packets as we can generate under certain conditions; they include presence
+ // of events in channels queues and in flight fitting into requested limit; after we hit one of these conditions
+ // we exit cycle
+ while (Socket && NumEventsInReadyChannels && InflightDataAmount < GetTotalInflightAmountOfData() && !ReceiveContext->WriteBlockedByFullSendBuffer) {
+ if (generatedBytes >= generateLimit) {
+ // resume later but ensure that we have issued at least one packet
+ RamInQueue = new TEvRam(false);
+ Send(SelfId(), RamInQueue);
+ RamStartedCycles = GetCycleCountFast();
+ LWPROBE(StartRam, Proxy->PeerNodeId);
+ break;
+ }
+
+ try {
+ generatedBytes += MakePacket(true);
+ ++generatedPackets;
+ } catch (const TExSerializedEventTooLarge& ex) {
+ // terminate session if the event can't be serialized properly
+ accountTraffic();
+ LOG_CRIT_IC("ICS31", "serialized event Type# 0x%08" PRIx32 " is too large", ex.Type);
+ return Terminate(TDisconnectReason::EventTooLarge());
+ }
+ }
+
+ if (Socket) {
+ WriteData();
+ }
+
+ LWPROBE(GenerateTraffic, Proxy->PeerNodeId, NHPTimer::GetSeconds(GetCycleCountFast() - generateStarted) * 1000.0, sizeBefore - TotalOutputQueueSize, generatedPackets, generatedBytes);
+
+ accountTraffic();
+ EqualizeCounter += ChannelScheduler->Equalize();
+ }
+
+ void TInterconnectSessionTCP::StartHandshake() {
+ LOG_INFO_IC_SESSION("ICS15", "start handshake");
+ IActor::InvokeOtherActor(*Proxy, &TInterconnectProxyTCP::StartResumeHandshake, ReceiveContext->LockLastProcessedPacketSerial());
+ }
+
+ void TInterconnectSessionTCP::ReestablishConnectionWithHandshake(TDisconnectReason reason) {
+ ReestablishConnection({}, true, std::move(reason));
+ }
+
+ void TInterconnectSessionTCP::ReestablishConnection(TEvHandshakeDone::TPtr&& ev, bool startHandshakeOnSessionClose,
+ TDisconnectReason reason) {
+ if (Socket) {
+ LOG_INFO_IC_SESSION("ICS13", "reestablish connection");
+ ShutdownSocket(std::move(reason)); // stop sending/receiving on socket
+ PendingHandshakeDoneEvent = std::move(ev);
+ StartHandshakeOnSessionClose = startHandshakeOnSessionClose;
+ if (!ReceiverId) {
+ ReestablishConnectionExecute();
+ }
+ }
+ }
+
+ void TInterconnectSessionTCP::OnDisconnect(TEvSocketDisconnect::TPtr& ev) {
+ if (ev->Sender == ReceiverId) {
+ const bool wasConnected(Socket);
+ LOG_INFO_IC_SESSION("ICS07", "socket disconnect %" PRIi64 " reason# %s", Socket ? i64(*Socket) : -1, ev->Get()->Reason.ToString().data());
+ ReceiverId = TActorId(); // reset receiver actor id as we have no more receiver yet
+ if (wasConnected) {
+ // we were sucessfully connected and did not expect failure, so it arrived from the input side; we should
+ // restart handshake process, closing our part of socket first
+ ShutdownSocket(ev->Get()->Reason);
+ StartHandshake();
+ } else {
+ ReestablishConnectionExecute();
+ }
+ }
+ }
+
+ void TInterconnectSessionTCP::ShutdownSocket(TDisconnectReason reason) {
+ if (Socket) {
+ if (const TString& s = reason.ToString()) {
+ Proxy->Metrics->IncDisconnectByReason(s);
+ }
+
+ LOG_INFO_IC_SESSION("ICS25", "shutdown socket, reason# %s", reason.ToString().data());
+ Proxy->UpdateErrorStateLog(TActivationContext::Now(), "close_socket", reason.ToString().data());
+ Socket->Shutdown(SHUT_RDWR);
+ Socket.Reset();
+ Proxy->Metrics->IncDisconnections();
+ CloseOnIdleWatchdog.Disarm();
+ LostConnectionWatchdog.Arm(SelfId());
+ Proxy->Metrics->SetConnected(0);
+ LOG_INFO(*TlsActivationContext, NActorsServices::INTERCONNECT_STATUS, "[%u] disconnected", Proxy->PeerNodeId);
+ }
+ }
+
+ void TInterconnectSessionTCP::ReestablishConnectionExecute() {
+ bool startHandshakeOnSessionClose = std::exchange(StartHandshakeOnSessionClose, false);
+ TEvHandshakeDone::TPtr ev = std::move(PendingHandshakeDoneEvent);
+
+ if (startHandshakeOnSessionClose) {
+ StartHandshake();
+ } else if (ev) {
+ SetNewConnection(ev);
+ }
+ }
+
+ void TInterconnectSessionTCP::Handle(TEvPollerReady::TPtr& ev) {
+ LOG_DEBUG_IC_SESSION("ICS29", "HandleReadyWrite WriteBlockedByFullSendBuffer# %s",
+ ReceiveContext->WriteBlockedByFullSendBuffer ? "true" : "false");
+ if (std::exchange(ReceiveContext->WriteBlockedByFullSendBuffer, false)) {
+ Proxy->Metrics->IncUsefulWriteWakeups();
+ ui64 nowCycles = GetCycleCountFast();
+ double blockedUs = NHPTimer::GetSeconds(nowCycles - WriteBlockedCycles) * 1000000.0;
+ LWPROBE(ReadyWrite, Proxy->PeerNodeId, NHPTimer::GetSeconds(nowCycles - ev->SendTime) * 1000.0, blockedUs / 1000.0);
+ WriteBlockedTotal += TDuration::MicroSeconds(blockedUs);
+ GenerateTraffic();
+ } else if (!ev->Cookie) {
+ Proxy->Metrics->IncSpuriousWriteWakeups();
+ }
+ if (Params.Encryption && ReceiveContext->ReadPending && !ev->Cookie) {
+ Send(ReceiverId, ev->Release().Release(), 0, 1);
+ }
+ }
+
+ void TInterconnectSessionTCP::Handle(TEvPollerRegisterResult::TPtr ev) {
+ PollerToken = std::move(ev->Get()->PollerToken);
+ if (ReceiveContext->WriteBlockedByFullSendBuffer) {
+ if (Params.Encryption) {
+ auto *secure = static_cast<NInterconnect::TSecureSocket*>(Socket.Get());
+ PollerToken->Request(secure->WantRead(), secure->WantWrite());
+ } else {
+ PollerToken->Request(false, true);
+ }
+ }
+ }
+
+ void TInterconnectSessionTCP::WriteData() {
+ ui64 written = 0;
+
+ Y_VERIFY(Socket); // ensure that socket wasn't closed
+
+ LWPROBE_IF_TOO_LONG(SlowICWriteData, Proxy->PeerNodeId, ms) {
+ constexpr ui32 iovLimit = 256;
+#ifdef _linux_
+ ui32 maxElementsInIOV = Min<ui32>(iovLimit, sysconf(_SC_IOV_MAX));
+#else
+ ui32 maxElementsInIOV = 64;
+#endif
+ if (Params.Encryption) {
+ maxElementsInIOV = 1;
+ }
+
+ // vector of write buffers with preallocated stack space
+ TStackVec<TConstIoVec, iovLimit> wbuffers;
+
+ LOG_DEBUG_IC_SESSION("ICS30", "WriteData WriteBlockedByFullSendBuffer# %s SendQueue.size# %zu",
+ ReceiveContext->WriteBlockedByFullSendBuffer ? "true" : "false", SendQueue.size());
+
+ // update last confirmed packet number if it has changed
+ if (SendQueuePos != SendQueue.end()) {
+ SendQueuePos->UpdateConfirmIfPossible(ReceiveContext->GetLastProcessedPacketSerial());
+ }
+
+ while (SendQueuePos != SendQueue.end() && !ReceiveContext->WriteBlockedByFullSendBuffer) {
+ for (auto it = SendQueuePos; it != SendQueue.end() && wbuffers.size() < maxElementsInIOV; ++it) {
+ it->AppendToIoVector(wbuffers, maxElementsInIOV);
+ }
+
+ const struct iovec* iovec = reinterpret_cast<const struct iovec*>(wbuffers.data());
+ int iovcnt = wbuffers.size();
+
+ Y_VERIFY(iovcnt > 0);
+ Y_VERIFY(iovec->iov_len > 0);
+
+ TString err;
+ ssize_t r = 0;
+ do {
+#ifndef _win_
+ r = iovcnt == 1 ? Socket->Send(iovec[0].iov_base, iovec[0].iov_len, &err) : Socket->WriteV(iovec, iovcnt);
+#else
+ r = Socket->Send(iovec[0].iov_base, iovec[0].iov_len, &err);
+#endif
+ Proxy->Metrics->IncSendSyscalls();
+ } while (r == -EINTR);
+
+ LOG_DEBUG_IC_SESSION("ICS16", "written# %zd iovcnt# %d err# %s", r, iovcnt, err.data());
+
+ wbuffers.clear();
+
+ if (r > 0) {
+ Y_VERIFY(static_cast<size_t>(r) <= BytesUnwritten);
+ BytesUnwritten -= r;
+ written += r;
+ ui64 packets = 0;
+
+ // advance SendQueuePos to eat all processed items
+ for (size_t amount = r; amount && SendQueuePos->DropBufs(amount); ++SendQueuePos) {
+ if (!SendQueuePos->IsEmpty()) {
+ LastSentSerial = Max(LastSentSerial, SendQueuePos->GetSerial());
+ }
+ ++PacketsWrittenToSocket;
+ ++packets;
+ LWTRACK(PacketWrittenToSocket, SendQueuePos->Orbit, Proxy->PeerNodeId, PacketsWrittenToSocket, SendQueuePos->TriedWriting, SendQueuePos->GetDataSize(), BytesUnwritten, GetWriteBlockedTotal(), (SOCKET)*Socket);
+ }
+
+ LWPROBE(WriteToSocket, Proxy->PeerNodeId, r, packets, PacketsWrittenToSocket, BytesUnwritten, GetWriteBlockedTotal(), (SOCKET)*Socket);
+ } else if (-r != EAGAIN && -r != EWOULDBLOCK) {
+ const TString message = r == 0 ? "connection closed by peer"
+ : err ? err
+ : Sprintf("socket: %s", strerror(-r));
+ LOG_NOTICE_NET(Proxy->PeerNodeId, "%s", message.data());
+ if (written) {
+ Proxy->Metrics->AddTotalBytesWritten(written);
+ }
+ return ReestablishConnectionWithHandshake(r == 0 ? TDisconnectReason::EndOfStream() : TDisconnectReason::FromErrno(-r));
+ } else {
+ // we have to do some hack for secure socket -- mark the packet as 'tried writing'
+ if (Params.Encryption) {
+ Y_VERIFY(SendQueuePos != SendQueue.end());
+ SendQueuePos->MarkTriedWriting(); // do not try to replace buffer under SSL
+ }
+
+ // we have received EAGAIN error code, this means that we can't issue more data until we have received
+ // TEvPollerReadyWrite event from poller; set up flag meaning this and wait for that event
+ Y_VERIFY(!ReceiveContext->WriteBlockedByFullSendBuffer);
+ ReceiveContext->WriteBlockedByFullSendBuffer = true;
+ WriteBlockedCycles = GetCycleCountFast();
+ LWPROBE(BlockedWrite, Proxy->PeerNodeId, SendQueue.size(), written);
+ LOG_DEBUG_IC_SESSION("ICS18", "hit send buffer limit");
+
+ if (PollerToken) {
+ if (Params.Encryption) {
+ auto *secure = static_cast<NInterconnect::TSecureSocket*>(Socket.Get());
+ PollerToken->Request(secure->WantRead(), secure->WantWrite());
+ } else {
+ PollerToken->Request(false, true);
+ }
+ }
+ }
+ }
+ }
+ if (written) {
+ Proxy->Metrics->AddTotalBytesWritten(written);
+ }
+ }
+
+ void TInterconnectSessionTCP::SetForcePacketTimestamp(TDuration period) {
+ if (period != TDuration::Max()) {
+ const TInstant when = TActivationContext::Now() + period;
+ if (when < ForcePacketTimestamp) {
+ ForcePacketTimestamp = when;
+ ScheduleFlush();
+ }
+ }
+ }
+
+ void TInterconnectSessionTCP::ScheduleFlush() {
+ if (FlushSchedule.empty() || ForcePacketTimestamp < FlushSchedule.top()) {
+ Schedule(ForcePacketTimestamp - TActivationContext::Now(), new TEvFlush);
+ FlushSchedule.push(ForcePacketTimestamp);
+ MaxFlushSchedule = Max(MaxFlushSchedule, FlushSchedule.size());
+ ++FlushEventsScheduled;
+ }
+ }
+
+ void TInterconnectSessionTCP::HandleFlush() {
+ const TInstant now = TActivationContext::Now();
+ while (FlushSchedule && now >= FlushSchedule.top()) {
+ FlushSchedule.pop();
+ }
+ IssuePingRequest();
+ if (Socket) {
+ if (now >= ForcePacketTimestamp) {
+ ++ConfirmPacketsForcedByTimeout;
+ ++FlushEventsProcessed;
+ MakePacket(false); // just generate confirmation packet if we have preconditions for this
+ } else if (ForcePacketTimestamp != TInstant::Max()) {
+ ScheduleFlush();
+ }
+ }
+ }
+
+ void TInterconnectSessionTCP::ResetFlushLogic() {
+ ForcePacketTimestamp = TInstant::Max();
+ UnconfirmedBytes = 0;
+ const TDuration ping = Proxy->Common->Settings.PingPeriod;
+ if (ping != TDuration::Zero() && !NumEventsInReadyChannels) {
+ SetForcePacketTimestamp(ping);
+ }
+ }
+
+ void TInterconnectSessionTCP::TrimSendQueueCache() {
+ static constexpr size_t maxItems = 32;
+ static constexpr size_t trimThreshold = maxItems * 2;
+ if (SendQueueCache.size() >= trimThreshold) {
+ auto it = SendQueueCache.end();
+ for (size_t n = SendQueueCache.size() - maxItems; n; --n) {
+ --it;
+ }
+
+ auto ev = std::make_unique<TEvFreeItems>();
+ ev->Items.splice(ev->Items.end(), SendQueueCache, it, SendQueueCache.end());
+ ev->NumBytes = ev->Items.size() * sizeof(TTcpPacketOutTask);
+ if (ev->GetInLineForDestruction(Proxy->Common)) {
+ Send(Proxy->Common->DestructorId, ev.release());
+ }
+ }
+ }
+
+ ui64 TInterconnectSessionTCP::MakePacket(bool data, TMaybe<ui64> pingMask) {
+ Y_VERIFY(Socket);
+
+ TSendQueue::iterator packet;
+ if (SendQueueCache) {
+ // we have entries in cache, take one and move it to the end of SendQueue
+ packet = SendQueueCache.begin();
+ SendQueue.splice(SendQueue.end(), SendQueueCache, packet);
+ packet->Reuse(); // reset packet to initial state
+ } else {
+ // we have to allocate new packet, so just do it
+ LWPROBE_IF_TOO_LONG(SlowICAllocPacketBuffer, Proxy->PeerNodeId, ms) {
+ packet = SendQueue.emplace(SendQueue.end(), Params);
+ }
+ }
+
+ // update send queue position
+ if (SendQueuePos == SendQueue.end()) {
+ SendQueuePos = packet; // start sending this packet if we are not sending anything for now
+ }
+
+ ui64 serial = 0;
+
+ if (data) {
+ // generate serial for this data packet
+ serial = ++OutputCounter;
+
+ // fill the data packet
+ Y_VERIFY(NumEventsInReadyChannels);
+ LWPROBE_IF_TOO_LONG(SlowICFillSendingBuffer, Proxy->PeerNodeId, ms) {
+ FillSendingBuffer(*packet, serial);
+ }
+ Y_VERIFY(!packet->IsEmpty());
+
+ InflightDataAmount += packet->GetDataSize();
+ Proxy->Metrics->AddInflightDataAmount(packet->GetDataSize());
+ if (InflightDataAmount > GetTotalInflightAmountOfData()) {
+ Proxy->Metrics->IncInflyLimitReach();
+ }
+
+ if (AtomicGet(ReceiveContext->ControlPacketId) == 0) {
+ AtomicSet(ReceiveContext->ControlPacketSendTimer, GetCycleCountFast());
+ AtomicSet(ReceiveContext->ControlPacketId, OutputCounter);
+ }
+
+ // update payload activity timer
+ LastPayloadActivityTimestamp = TActivationContext::Now();
+ } else if (pingMask) {
+ serial = *pingMask;
+
+ // make this packet a priority one
+ if (SendQueuePos != packet) {
+ Y_VERIFY(SendQueuePos != SendQueue.end());
+ if (SendQueuePos->IsAtBegin()) {
+ // insert this packet just before the next being sent and step back
+ SendQueue.splice(SendQueuePos, SendQueue, packet);
+ --SendQueuePos;
+ Y_VERIFY(SendQueuePos == packet);
+ } else {
+ // current packet is already being sent, so move new packet just after it
+ SendQueue.splice(std::next(SendQueuePos), SendQueue, packet);
+ }
+ }
+ }
+
+ const ui64 lastInputSerial = ReceiveContext->GetLastProcessedPacketSerial();
+ packet->SetMetadata(serial, lastInputSerial);
+ packet->Sign();
+
+ // count number of bytes pending for write
+ ui64 packetSize = (Params.UseModernFrame ? sizeof(TTcpPacketHeader_v2) : sizeof(TTcpPacketHeader_v1)) + packet->GetDataSize();
+ BytesUnwritten += packetSize;
+
+ LOG_DEBUG_IC_SESSION("ICS22", "outgoing packet Serial# %" PRIu64 " Confirm# %" PRIu64 " DataSize# %zu"
+ " InflightDataAmount# %" PRIu64 " BytesUnwritten# %" PRIu64, serial, lastInputSerial, packet->GetDataSize(),
+ InflightDataAmount, BytesUnwritten);
+
+ // reset forced packet sending timestamp as we have confirmed all received data
+ ResetFlushLogic();
+
+ ++PacketsGenerated;
+ LWTRACK(PacketGenerated, packet->Orbit, Proxy->PeerNodeId, BytesUnwritten, InflightDataAmount, PacketsGenerated, packetSize);
+
+ if (!data) {
+ WriteData();
+ }
+
+ return packetSize;
+ }
+
+ bool TInterconnectSessionTCP::DropConfirmed(ui64 confirm) {
+ LOG_DEBUG_IC_SESSION("ICS23", "confirm count: %" PRIu64, confirm);
+
+ Y_VERIFY(LastConfirmed <= confirm && confirm <= LastSentSerial && LastSentSerial <= OutputCounter,
+ "%s confirm# %" PRIu64 " LastConfirmed# %" PRIu64 " OutputCounter# %" PRIu64 " LastSentSerial# %" PRIu64,
+ LogPrefix.data(), confirm, LastConfirmed, OutputCounter, LastSentSerial);
+ LastConfirmed = confirm;
+
+ ui64 droppedDataAmount = 0;
+ ui32 numDropped = 0;
+
+ // drop confirmed packets; this also includes any auxiliary packets as their serial is set to zero, effectively
+ // making Serial <= confirm true
+ TSendQueue::iterator it;
+ ui64 lastDroppedSerial = 0;
+ for (it = SendQueue.begin(); it != SendQueuePos && it->Confirmed(confirm); ++it) {
+ if (!it->IsEmpty()) {
+ lastDroppedSerial = it->GetSerial();
+ }
+ droppedDataAmount += it->GetDataSize();
+ ++numDropped;
+ }
+ SendQueueCache.splice(SendQueueCache.begin(), SendQueue, SendQueue.begin(), it);
+ TrimSendQueueCache();
+ ChannelScheduler->ForEach([&](TEventOutputChannel& channel) {
+ channel.DropConfirmed(lastDroppedSerial);
+ });
+
+ const ui64 current = InflightDataAmount;
+ const ui64 limit = GetTotalInflightAmountOfData();
+ const bool unblockedSomething = current >= limit && current < limit + droppedDataAmount;
+
+ PacketsConfirmed += numDropped;
+ InflightDataAmount -= droppedDataAmount;
+ Proxy->Metrics->SubInflightDataAmount(droppedDataAmount);
+ LWPROBE(DropConfirmed, Proxy->PeerNodeId, droppedDataAmount, InflightDataAmount);
+
+ LOG_DEBUG_IC_SESSION("ICS24", "exit InflightDataAmount: %" PRIu64 " bytes droppedDataAmount: %" PRIu64 " bytes"
+ " dropped %" PRIu32 " packets", InflightDataAmount, droppedDataAmount, numDropped);
+
+ Pool->Trim(); // send any unsent free requests
+
+ return unblockedSomething;
+ }
+
+ void TInterconnectSessionTCP::FillSendingBuffer(TTcpPacketOutTask& task, ui64 serial) {
+ ui32 bytesGenerated = 0;
+
+ Y_VERIFY(NumEventsInReadyChannels);
+ while (NumEventsInReadyChannels) {
+ TEventOutputChannel *channel = ChannelScheduler->PickChannelWithLeastConsumedWeight();
+ Y_VERIFY_DEBUG(!channel->IsEmpty());
+
+ // generate some data within this channel
+ const ui64 netBefore = channel->GetBufferedAmountOfData();
+ ui64 gross = 0;
+ const bool eventDone = channel->FeedBuf(task, serial, &gross);
+ channel->UnaccountedTraffic += gross;
+ const ui64 netAfter = channel->GetBufferedAmountOfData();
+ Y_VERIFY_DEBUG(netAfter <= netBefore); // net amount should shrink
+ const ui64 net = netBefore - netAfter; // number of net bytes serialized
+
+ // adjust metrics for local and global queue size
+ TotalOutputQueueSize -= net;
+ Proxy->Metrics->SubOutputBuffersTotalSize(net);
+ bytesGenerated += gross;
+ Y_VERIFY_DEBUG(!!net == !!gross && gross >= net, "net# %" PRIu64 " gross# %" PRIu64, net, gross);
+
+ // return it back to queue or delete, depending on whether this channel is still working or not
+ ChannelScheduler->FinishPick(gross, EqualizeCounter);
+
+ // update some stats if the packet was fully serialized
+ if (eventDone) {
+ ++MessagesWrittenToBuffer;
+
+ Y_VERIFY(NumEventsInReadyChannels);
+ --NumEventsInReadyChannels;
+
+ if (!NumEventsInReadyChannels) {
+ SetOutputStuckFlag(false);
+ }
+ }
+
+ if (!gross) { // no progress -- almost full packet buffer
+ break;
+ }
+ }
+
+ LWTRACK(FillSendingBuffer, task.Orbit, Proxy->PeerNodeId, bytesGenerated, NumEventsInReadyChannels, WriteBlockedTotal);
+ Y_VERIFY(bytesGenerated); // ensure we are not stalled in serialization
+ }
+
+ ui32 TInterconnectSessionTCP::CalculateQueueUtilization() {
+ SwitchStuckPeriod();
+ ui64 sumBusy = 0, sumPeriod = 0;
+ for (auto iter = OutputQueueUtilization.begin(); iter != OutputQueueUtilization.end() - 1; ++iter) {
+ sumBusy += iter->first;
+ sumPeriod += iter->second;
+ }
+ return sumBusy * 1000000 / sumPeriod;
+ }
+
+ void TInterconnectSessionTCP::SendUpdateToWhiteboard(bool connected) {
+ const ui32 utilization = Socket ? CalculateQueueUtilization() : 0;
+
+ if (const auto& callback = Proxy->Common->UpdateWhiteboard) {
+ enum class EFlag {
+ GREEN,
+ YELLOW,
+ ORANGE,
+ RED,
+ };
+ EFlag flagState = EFlag::RED;
+
+ if (Socket) {
+ flagState = EFlag::GREEN;
+
+ do {
+ auto lastInputDelay = TActivationContext::Now() - LastInputActivityTimestamp;
+ if (lastInputDelay * 4 >= GetDeadPeerTimeout() * 3) {
+ flagState = EFlag::ORANGE;
+ break;
+ } else if (lastInputDelay * 2 >= GetDeadPeerTimeout()) {
+ flagState = EFlag::YELLOW;
+ }
+
+ // check utilization
+ if (utilization > 875000) { // 7/8
+ flagState = EFlag::ORANGE;
+ break;
+ } else if (utilization > 500000) { // 1/2
+ flagState = EFlag::YELLOW;
+ }
+ } while (false);
+ }
+
+ callback(Proxy->Metrics->GetHumanFriendlyPeerHostName(),
+ connected,
+ flagState == EFlag::GREEN,
+ flagState == EFlag::YELLOW,
+ flagState == EFlag::ORANGE,
+ flagState == EFlag::RED,
+ TlsActivationContext->ExecutorThread.ActorSystem);
+ }
+
+ if (connected) {
+ Schedule(TDuration::Seconds(1), new TEvents::TEvWakeup);
+ }
+ }
+
+ void TInterconnectSessionTCP::SetOutputStuckFlag(bool state) {
+ if (OutputStuckFlag == state)
+ return;
+
+ if (OutputQueueUtilization.Size() == 0)
+ return;
+
+ auto& lastpair = OutputQueueUtilization.Last();
+ if (state)
+ lastpair.first -= GetCycleCountFast();
+ else
+ lastpair.first += GetCycleCountFast();
+
+ OutputStuckFlag = state;
+ }
+
+ void TInterconnectSessionTCP::SwitchStuckPeriod() {
+ auto now = GetCycleCountFast();
+ if (OutputQueueUtilization.Size() != 0) {
+ auto& lastpair = OutputQueueUtilization.Last();
+ lastpair.second = now - lastpair.second;
+ if (OutputStuckFlag)
+ lastpair.first += now;
+ }
+
+ OutputQueueUtilization.Push(std::pair<ui64, ui64>(0, now));
+ if (OutputStuckFlag)
+ OutputQueueUtilization.Last().first -= now;
+ }
+
+ TDuration TInterconnectSessionTCP::GetDeadPeerTimeout() const {
+ return Coalesce(Proxy->Common->Settings.DeadPeer, DEFAULT_DEADPEER_TIMEOUT);
+ }
+
+ TDuration TInterconnectSessionTCP::GetCloseOnIdleTimeout() const {
+ return Proxy->Common->Settings.CloseOnIdle;
+ }
+
+ TDuration TInterconnectSessionTCP::GetLostConnectionTimeout() const {
+ return Coalesce(Proxy->Common->Settings.LostConnection, DEFAULT_LOST_CONNECTION_TIMEOUT);
+ }
+
+ ui32 TInterconnectSessionTCP::GetTotalInflightAmountOfData() const {
+ return Coalesce(Proxy->Common->Settings.TotalInflightAmountOfData, DEFAULT_TOTAL_INFLIGHT_DATA);
+ }
+
+ ui64 TInterconnectSessionTCP::GetMaxCyclesPerEvent() const {
+ return DurationToCycles(TDuration::MicroSeconds(50));
+ }
+
+ void TInterconnectSessionTCP::IssuePingRequest() {
+ const TInstant now = TActivationContext::Now();
+ if (now >= LastPingTimestamp + PingPeriodicity) {
+ LOG_DEBUG_IC_SESSION("ICS22", "Issuing ping request");
+ if (Socket) {
+ MakePacket(false, GetCycleCountFast() | TTcpPacketBuf::PingRequestMask);
+ }
+ if (Socket) {
+ MakePacket(false, TInstant::Now().MicroSeconds() | TTcpPacketBuf::ClockMask);
+ }
+ LastPingTimestamp = now;
+ }
+ }
+
+ void TInterconnectSessionTCP::Handle(TEvProcessPingRequest::TPtr ev) {
+ if (Socket) {
+ MakePacket(false, ev->Get()->Payload | TTcpPacketBuf::PingResponseMask);
+ }
+ }
+
+ void TInterconnectSessionTCP::GenerateHttpInfo(TStringStream& str) {
+ HTML(str) {
+ DIV_CLASS("panel panel-info") {
+ DIV_CLASS("panel-heading") {
+ str << "Session";
+ }
+ DIV_CLASS("panel-body") {
+ TABLE_CLASS("table") {
+ TABLEHEAD() {
+ TABLER() {
+ TABLEH() {
+ str << "Sensor";
+ }
+ TABLEH() {
+ str << "Value";
+ }
+ }
+ }
+ TABLEBODY() {
+ TABLER() {
+ TABLED() {
+ str << "Encryption";
+ }
+ TABLED() {
+ str << (Params.Encryption ? "<font color=green>Enabled</font>" : "<font color=red>Disabled</font>");
+ }
+ }
+ if (auto *x = dynamic_cast<NInterconnect::TSecureSocket*>(Socket.Get())) {
+ TABLER() {
+ TABLED() {
+ str << "Cipher name";
+ }
+ TABLED() {
+ str << x->GetCipherName();
+ }
+ }
+ TABLER() {
+ TABLED() {
+ str << "Cipher bits";
+ }
+ TABLED() {
+ str << x->GetCipherBits();
+ }
+ }
+ TABLER() {
+ TABLED() {
+ str << "Protocol";
+ }
+ TABLED() {
+ str << x->GetProtocolName();
+ }
+ }
+ TABLER() {
+ TABLED() {
+ str << "Peer CN";
+ }
+ TABLED() {
+ str << x->GetPeerCommonName();
+ }
+ }
+ }
+ TABLER() {
+ TABLED() { str << "AuthOnly CN"; }
+ TABLED() { str << Params.AuthCN; }
+ }
+ TABLER() {
+ TABLED() {
+ str << "Local scope id";
+ }
+ TABLED() {
+ str << ScopeIdToString(Proxy->Common->LocalScopeId);
+ }
+ }
+ TABLER() {
+ TABLED() {
+ str << "Peer scope id";
+ }
+ TABLED() {
+ str << ScopeIdToString(Params.PeerScopeId);
+ }
+ }
+ TABLER() {
+ TABLED() {
+ str << "This page generated at";
+ }
+ TABLED() {
+ str << TActivationContext::Now() << " / " << Now();
+ }
+ }
+ TABLER() {
+ TABLED() {
+ str << "SelfID";
+ }
+ TABLED() {
+ str << SelfId().ToString();
+ }
+ }
+ TABLER() {
+ TABLED() { str << "Frame version/Checksum"; }
+ TABLED() { str << (!Params.UseModernFrame ? "v1/crc32c" : Params.Encryption ? "v2/none" : "v2/crc32c"); }
+ }
+#define MON_VAR(NAME) \
+ TABLER() { \
+ TABLED() { \
+ str << #NAME; \
+ } \
+ TABLED() { \
+ str << NAME; \
+ } \
+ }
+
+ MON_VAR(Created)
+ MON_VAR(NewConnectionSet)
+ MON_VAR(ReceiverId)
+ MON_VAR(MessagesGot)
+ MON_VAR(MessagesWrittenToBuffer)
+ MON_VAR(PacketsGenerated)
+ MON_VAR(PacketsWrittenToSocket)
+ MON_VAR(PacketsConfirmed)
+ MON_VAR(AtomicGet(ReceiveContext->PacketsReadFromSocket))
+ MON_VAR(ConfirmPacketsForcedBySize)
+ MON_VAR(ConfirmPacketsForcedByTimeout)
+
+ TABLER() {
+ TABLED() {
+ str << "Virtual self ID";
+ }
+ TABLED() {
+ str << Proxy->SessionVirtualId.ToString();
+ }
+ }
+ TABLER() {
+ TABLED() {
+ str << "Virtual peer ID";
+ }
+ TABLED() {
+ str << Proxy->RemoteSessionVirtualId.ToString();
+ }
+ }
+ TABLER() {
+ TABLED() {
+ str << "Socket";
+ }
+ TABLED() {
+ str << (Socket ? i64(*Socket) : -1);
+ }
+ }
+
+ ui32 unsentQueueSize = Socket ? Socket->GetUnsentQueueSize() : 0;
+
+ MON_VAR(OutputStuckFlag)
+ MON_VAR(SendQueue.size())
+ MON_VAR(SendQueueCache.size())
+ MON_VAR(NumEventsInReadyChannels)
+ MON_VAR(TotalOutputQueueSize)
+ MON_VAR(BytesUnwritten)
+ MON_VAR(InflightDataAmount)
+ MON_VAR(unsentQueueSize)
+ MON_VAR(SendBufferSize)
+ MON_VAR(LastInputActivityTimestamp)
+ MON_VAR(LastPayloadActivityTimestamp)
+ MON_VAR(LastHandshakeDone)
+ MON_VAR(OutputCounter)
+ MON_VAR(LastSentSerial)
+ MON_VAR(ReceiveContext->GetLastProcessedPacketSerial())
+ MON_VAR(LastConfirmed)
+ MON_VAR(FlushSchedule.size())
+ MON_VAR(MaxFlushSchedule)
+ MON_VAR(FlushEventsScheduled)
+ MON_VAR(FlushEventsProcessed)
+
+ TString clockSkew;
+ i64 x = GetClockSkew();
+ if (x < 0) {
+ clockSkew = Sprintf("-%s", TDuration::MicroSeconds(-x).ToString().data());
+ } else {
+ clockSkew = Sprintf("+%s", TDuration::MicroSeconds(x).ToString().data());
+ }
+
+ MON_VAR(LastPingTimestamp)
+ MON_VAR(GetPingRTT())
+ MON_VAR(clockSkew)
+
+ MON_VAR(GetDeadPeerTimeout())
+ MON_VAR(GetTotalInflightAmountOfData())
+ MON_VAR(GetCloseOnIdleTimeout())
+ MON_VAR(Subscribers.size())
+ }
+ }
+ }
+ }
+ }
+ }
+
+ void CreateSessionKillingActor(TInterconnectProxyCommon::TPtr common) {
+ TlsActivationContext->ExecutorThread.ActorSystem->Register(new TInterconnectSessionKiller(common));
+ }
+}
diff --git a/library/cpp/actors/interconnect/interconnect_tcp_session.h b/library/cpp/actors/interconnect/interconnect_tcp_session.h
new file mode 100644
index 0000000000..7fc00dbcc5
--- /dev/null
+++ b/library/cpp/actors/interconnect/interconnect_tcp_session.h
@@ -0,0 +1,565 @@
+#pragma once
+
+#include <library/cpp/actors/core/hfunc.h>
+#include <library/cpp/actors/core/event_pb.h>
+#include <library/cpp/actors/core/events.h>
+#include <library/cpp/actors/core/log.h>
+#include <library/cpp/actors/helpers/mon_histogram_helper.h>
+#include <library/cpp/actors/protos/services_common.pb.h>
+#include <library/cpp/actors/util/datetime.h>
+#include <library/cpp/actors/util/rope.h>
+#include <library/cpp/actors/util/funnel_queue.h>
+#include <library/cpp/actors/util/recentwnd.h>
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+#include <library/cpp/actors/core/actor_bootstrapped.h>
+
+#include <util/generic/queue.h>
+#include <util/generic/deque.h>
+#include <util/datetime/cputimer.h>
+
+#include "interconnect_impl.h"
+#include "poller_tcp.h"
+#include "poller_actor.h"
+#include "interconnect_channel.h"
+#include "logging.h"
+#include "watchdog_timer.h"
+#include "event_holder_pool.h"
+#include "channel_scheduler.h"
+
+#include <unordered_set>
+#include <unordered_map>
+
+namespace NActors {
+ class TSlowPathChecker {
+ using TTraceCallback = std::function<void(double)>;
+ TTraceCallback Callback;
+ const NHPTimer::STime Start;
+
+ public:
+ TSlowPathChecker(TTraceCallback&& callback)
+ : Callback(std::move(callback))
+ , Start(GetCycleCountFast())
+ {
+ }
+
+ ~TSlowPathChecker() {
+ const NHPTimer::STime end = GetCycleCountFast();
+ const NHPTimer::STime elapsed = end - Start;
+ if (elapsed > 1000000) {
+ Callback(NHPTimer::GetSeconds(elapsed) * 1000);
+ }
+ }
+
+ operator bool() const {
+ return false;
+ }
+ };
+
+#define LWPROBE_IF_TOO_LONG(...) \
+ if (auto __x = TSlowPathChecker{[&](double ms) { LWPROBE(__VA_ARGS__); }}) \
+ ; \
+ else
+
+ class TTimeLimit {
+ public:
+ TTimeLimit(ui64 limitInCycles)
+ : UpperLimit(limitInCycles == 0 ? 0 : GetCycleCountFast() + limitInCycles)
+ {
+ }
+
+ TTimeLimit(ui64 startTS, ui64 limitInCycles)
+ : UpperLimit(limitInCycles == 0 ? 0 : startTS + limitInCycles)
+ {
+ }
+
+ bool CheckExceeded() {
+ return UpperLimit != 0 && GetCycleCountFast() > UpperLimit;
+ }
+
+ const ui64 UpperLimit;
+ };
+
+ static constexpr TDuration DEFAULT_DEADPEER_TIMEOUT = TDuration::Seconds(10);
+ static constexpr TDuration DEFAULT_LOST_CONNECTION_TIMEOUT = TDuration::Seconds(10);
+ static constexpr ui32 DEFAULT_MAX_INFLIGHT_DATA = 10240 * 1024;
+ static constexpr ui32 DEFAULT_TOTAL_INFLIGHT_DATA = 4 * 10240 * 1024;
+
+ class TInterconnectProxyTCP;
+
+ enum class EUpdateState : ui8 {
+ NONE, // no updates generated by input session yet
+ INFLIGHT, // one update is inflight, and no more pending
+ INFLIGHT_AND_PENDING, // one update is inflight, and one is pending
+ CONFIRMING, // confirmation inflight
+ };
+
+ struct TReceiveContext: public TAtomicRefCount<TReceiveContext> {
+ /* All invokations to these fields should be thread-safe */
+
+ ui64 ControlPacketSendTimer = 0;
+ ui64 ControlPacketId = 0;
+
+ // number of packets received by input session
+ TAtomic PacketsReadFromSocket = 0;
+ TAtomic DataPacketsReadFromSocket = 0;
+
+ // last processed packet by input session
+ std::atomic_uint64_t LastProcessedPacketSerial = 0;
+ static constexpr uint64_t LastProcessedPacketSerialLockBit = uint64_t(1) << 63;
+
+ // for hardened checks
+ TAtomic NumInputSessions = 0;
+
+ NHPTimer::STime StartTime;
+
+ std::atomic<ui64> PingRTT_us = 0;
+ std::atomic<i64> ClockSkew_us = 0;
+
+ std::atomic<EUpdateState> UpdateState;
+ static_assert(std::atomic<EUpdateState>::is_always_lock_free);
+
+ bool WriteBlockedByFullSendBuffer = false;
+ bool ReadPending = false;
+
+ std::array<TRope, 16> ChannelArray;
+ std::unordered_map<ui16, TRope> ChannelMap;
+
+ TReceiveContext() {
+ GetTimeFast(&StartTime);
+ }
+
+ // returns false if sessions needs to be terminated and packet not to be processed
+ bool AdvanceLastProcessedPacketSerial() {
+ for (;;) {
+ uint64_t value = LastProcessedPacketSerial.load();
+ if (value & LastProcessedPacketSerialLockBit) {
+ return false;
+ }
+ if (LastProcessedPacketSerial.compare_exchange_weak(value, value + 1)) {
+ return true;
+ }
+ }
+ }
+
+ ui64 LockLastProcessedPacketSerial() {
+ for (;;) {
+ uint64_t value = LastProcessedPacketSerial.load();
+ if (value & LastProcessedPacketSerialLockBit) {
+ return value & ~LastProcessedPacketSerialLockBit;
+ }
+ if (LastProcessedPacketSerial.compare_exchange_strong(value, value | LastProcessedPacketSerialLockBit)) {
+ return value;
+ }
+ }
+ }
+
+ void UnlockLastProcessedPacketSerial() {
+ LastProcessedPacketSerial = LastProcessedPacketSerial.load() & ~LastProcessedPacketSerialLockBit;
+ }
+
+ ui64 GetLastProcessedPacketSerial() {
+ return LastProcessedPacketSerial.load() & ~LastProcessedPacketSerialLockBit;
+ }
+ };
+
+ class TInputSessionTCP
+ : public TActorBootstrapped<TInputSessionTCP>
+ , public TInterconnectLoggingBase
+ {
+ enum {
+ EvCheckDeadPeer = EventSpaceBegin(TEvents::ES_PRIVATE),
+ EvResumeReceiveData,
+ };
+
+ struct TEvCheckDeadPeer : TEventLocal<TEvCheckDeadPeer, EvCheckDeadPeer> {};
+ struct TEvResumeReceiveData : TEventLocal<TEvResumeReceiveData, EvResumeReceiveData> {};
+
+ public:
+ static constexpr EActivityType ActorActivityType() {
+ return INTERCONNECT_SESSION_TCP;
+ }
+
+ TInputSessionTCP(const TActorId& sessionId,
+ TIntrusivePtr<NInterconnect::TStreamSocket> socket,
+ TIntrusivePtr<TReceiveContext> context,
+ TInterconnectProxyCommon::TPtr common,
+ std::shared_ptr<IInterconnectMetrics> metrics,
+ ui32 nodeId,
+ ui64 lastConfirmed,
+ TDuration deadPeerTimeout,
+ TSessionParams params);
+
+ private:
+ friend class TActorBootstrapped<TInputSessionTCP>;
+
+ void Bootstrap();
+
+ STRICT_STFUNC(WorkingState,
+ cFunc(TEvents::TSystem::PoisonPill, PassAway)
+ hFunc(TEvPollerReady, Handle)
+ hFunc(TEvPollerRegisterResult, Handle)
+ cFunc(EvResumeReceiveData, HandleResumeReceiveData)
+ cFunc(TEvInterconnect::TEvCloseInputSession::EventType, CloseInputSession)
+ cFunc(EvCheckDeadPeer, HandleCheckDeadPeer)
+ cFunc(TEvConfirmUpdate::EventType, HandleConfirmUpdate)
+ )
+
+ private:
+ TRope IncomingData;
+
+ const TActorId SessionId;
+ TIntrusivePtr<NInterconnect::TStreamSocket> Socket;
+ TPollerToken::TPtr PollerToken;
+ TIntrusivePtr<TReceiveContext> Context;
+ TInterconnectProxyCommon::TPtr Common;
+ const ui32 NodeId;
+ const TSessionParams Params;
+
+ // header we are currently processing (parsed from the stream)
+ union {
+ TTcpPacketHeader_v1 v1;
+ TTcpPacketHeader_v2 v2;
+ char Data[1];
+ } Header;
+ ui64 HeaderConfirm, HeaderSerial;
+
+ size_t PayloadSize;
+ ui32 ChecksumExpected, Checksum;
+ bool IgnorePayload;
+ TRope Payload;
+ enum class EState {
+ HEADER,
+ PAYLOAD,
+ };
+ EState State = EState::HEADER;
+
+ THolder<TEvUpdateFromInputSession> UpdateFromInputSession;
+
+ ui64 ConfirmedByInput;
+
+ std::shared_ptr<IInterconnectMetrics> Metrics;
+
+ bool CloseInputSessionRequested = false;
+
+ void CloseInputSession();
+
+ void Handle(TEvPollerReady::TPtr ev);
+ void Handle(TEvPollerRegisterResult::TPtr ev);
+ void HandleResumeReceiveData();
+ void HandleConfirmUpdate();
+ void ReceiveData();
+ void ProcessHeader(size_t headerLen);
+ void ProcessPayload(ui64& numDataBytes);
+ void ProcessEvent(TRope& data, TEventDescr& descr);
+ bool ReadMore();
+
+ void ReestablishConnection(TDisconnectReason reason);
+ void DestroySession(TDisconnectReason reason);
+
+ TDeque<TIntrusivePtr<TRopeAlignedBuffer>> Buffers;
+
+ static constexpr size_t NumPreallocatedBuffers = 16;
+ void PreallocateBuffers();
+
+ inline ui64 GetMaxCyclesPerEvent() const {
+ return DurationToCycles(TDuration::MicroSeconds(500));
+ }
+
+ const TDuration DeadPeerTimeout;
+ TInstant LastReceiveTimestamp;
+ void HandleCheckDeadPeer();
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // pinger logic
+
+ bool NewPingProtocol = false;
+ TDeque<TDuration> PingQ; // last N ping samples
+ TDeque<i64> SkewQ; // last N calculated clock skew samples
+
+ void HandlePingResponse(TDuration passed);
+ void HandleClock(TInstant clock);
+ };
+
+ class TInterconnectSessionTCP
+ : public TActor<TInterconnectSessionTCP>
+ , public TInterconnectLoggingBase
+ {
+ enum {
+ EvCheckCloseOnIdle = EventSpaceBegin(TEvents::ES_PRIVATE),
+ EvCheckLostConnection,
+ EvRam,
+ EvTerminate,
+ EvFreeItems,
+ };
+
+ struct TEvCheckCloseOnIdle : TEventLocal<TEvCheckCloseOnIdle, EvCheckCloseOnIdle> {};
+ struct TEvCheckLostConnection : TEventLocal<TEvCheckLostConnection, EvCheckLostConnection> {};
+
+ struct TEvRam : TEventLocal<TEvRam, EvRam> {
+ const bool Batching;
+ TEvRam(bool batching) : Batching(batching) {}
+ };
+
+ struct TEvTerminate : TEventLocal<TEvTerminate, EvTerminate> {
+ TDisconnectReason Reason;
+
+ TEvTerminate(TDisconnectReason reason)
+ : Reason(std::move(reason))
+ {}
+ };
+
+ const TInstant Created;
+ TInstant NewConnectionSet;
+ ui64 MessagesGot = 0;
+ ui64 MessagesWrittenToBuffer = 0;
+ ui64 PacketsGenerated = 0;
+ ui64 PacketsWrittenToSocket = 0;
+ ui64 PacketsConfirmed = 0;
+
+ public:
+ static constexpr EActivityType ActorActivityType() {
+ return INTERCONNECT_SESSION_TCP;
+ }
+
+ TInterconnectSessionTCP(TInterconnectProxyTCP* const proxy, TSessionParams params);
+ ~TInterconnectSessionTCP();
+
+ void Init();
+ void CloseInputSession();
+
+ static TEvTerminate* NewEvTerminate(TDisconnectReason reason) {
+ return new TEvTerminate(std::move(reason));
+ }
+
+ TDuration GetPingRTT() const {
+ return TDuration::MicroSeconds(ReceiveContext->PingRTT_us);
+ }
+
+ i64 GetClockSkew() const {
+ return ReceiveContext->ClockSkew_us;
+ }
+
+ private:
+ friend class TInterconnectProxyTCP;
+
+ void Handle(TEvTerminate::TPtr& ev);
+ void HandlePoison();
+ void Terminate(TDisconnectReason reason);
+ void PassAway() override;
+
+ void Forward(STATEFN_SIG);
+ void Subscribe(STATEFN_SIG);
+ void Unsubscribe(STATEFN_SIG);
+
+ STRICT_STFUNC(StateFunc,
+ fFunc(TEvInterconnect::EvForward, Forward)
+ cFunc(TEvents::TEvPoisonPill::EventType, HandlePoison)
+ fFunc(TEvInterconnect::TEvConnectNode::EventType, Subscribe)
+ fFunc(TEvents::TEvSubscribe::EventType, Subscribe)
+ fFunc(TEvents::TEvUnsubscribe::EventType, Unsubscribe)
+ cFunc(TEvFlush::EventType, HandleFlush)
+ hFunc(TEvPollerReady, Handle)
+ hFunc(TEvPollerRegisterResult, Handle)
+ hFunc(TEvUpdateFromInputSession, Handle)
+ hFunc(TEvRam, HandleRam)
+ hFunc(TEvCheckCloseOnIdle, CloseOnIdleWatchdog)
+ hFunc(TEvCheckLostConnection, LostConnectionWatchdog)
+ cFunc(TEvents::TSystem::Wakeup, SendUpdateToWhiteboard)
+ hFunc(TEvSocketDisconnect, OnDisconnect)
+ hFunc(TEvTerminate, Handle)
+ hFunc(TEvProcessPingRequest, Handle)
+ )
+
+ void Handle(TEvUpdateFromInputSession::TPtr& ev);
+
+ void OnDisconnect(TEvSocketDisconnect::TPtr& ev);
+
+ THolder<TEvHandshakeAck> ProcessHandshakeRequest(TEvHandshakeAsk::TPtr& ev);
+ void SetNewConnection(TEvHandshakeDone::TPtr& ev);
+
+ TEvRam* RamInQueue = nullptr;
+ ui64 RamStartedCycles = 0;
+ void HandleRam(TEvRam::TPtr& ev);
+ void GenerateTraffic();
+
+ void SendUpdateToWhiteboard(bool connected = true);
+ ui32 CalculateQueueUtilization();
+
+ void Handle(TEvPollerReady::TPtr& ev);
+ void Handle(TEvPollerRegisterResult::TPtr ev);
+ void WriteData();
+
+ ui64 MakePacket(bool data, TMaybe<ui64> pingMask = {});
+ void FillSendingBuffer(TTcpPacketOutTask& packet, ui64 serial);
+ bool DropConfirmed(ui64 confirm);
+ void ShutdownSocket(TDisconnectReason reason);
+
+ void StartHandshake();
+ void ReestablishConnection(TEvHandshakeDone::TPtr&& ev, bool startHandshakeOnSessionClose,
+ TDisconnectReason reason);
+ void ReestablishConnectionWithHandshake(TDisconnectReason reason);
+ void ReestablishConnectionExecute();
+
+ TInterconnectProxyTCP* const Proxy;
+
+ // various connection settings access
+ TDuration GetDeadPeerTimeout() const;
+ TDuration GetCloseOnIdleTimeout() const;
+ TDuration GetLostConnectionTimeout() const;
+ ui32 GetTotalInflightAmountOfData() const;
+ ui64 GetMaxCyclesPerEvent() const;
+
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // pinger
+
+ TInstant LastPingTimestamp;
+ static constexpr TDuration PingPeriodicity = TDuration::Seconds(1);
+ void IssuePingRequest();
+ void Handle(TEvProcessPingRequest::TPtr ev);
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ TInstant LastInputActivityTimestamp;
+ TInstant LastPayloadActivityTimestamp;
+ TWatchdogTimer<TEvCheckCloseOnIdle> CloseOnIdleWatchdog;
+ TWatchdogTimer<TEvCheckLostConnection> LostConnectionWatchdog;
+
+ void OnCloseOnIdleTimerHit() {
+ LOG_INFO_IC("ICS27", "CloseOnIdle timer hit, session terminated");
+ Terminate(TDisconnectReason::CloseOnIdle());
+ }
+
+ void OnLostConnectionTimerHit() {
+ LOG_ERROR_IC("ICS28", "LostConnection timer hit, session terminated");
+ Terminate(TDisconnectReason::LostConnection());
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ const TSessionParams Params;
+ TMaybe<TEventHolderPool> Pool;
+ TMaybe<TChannelScheduler> ChannelScheduler;
+ ui64 TotalOutputQueueSize;
+ bool OutputStuckFlag;
+ TRecentWnd<std::pair<ui64, ui64>> OutputQueueUtilization;
+ size_t NumEventsInReadyChannels = 0;
+
+ void SetOutputStuckFlag(bool state);
+ void SwitchStuckPeriod();
+
+ using TSendQueue = TList<TTcpPacketOutTask>;
+ TSendQueue SendQueue;
+ TSendQueue SendQueueCache;
+ TSendQueue::iterator SendQueuePos;
+ ui64 WriteBlockedCycles = 0; // start of current block period
+ TDuration WriteBlockedTotal; // total incremental duration that session has been blocked
+ ui64 BytesUnwritten = 0;
+
+ void TrimSendQueueCache();
+
+ TDuration GetWriteBlockedTotal() const {
+ if (ReceiveContext->WriteBlockedByFullSendBuffer) {
+ double blockedUs = NHPTimer::GetSeconds(GetCycleCountFast() - WriteBlockedCycles) * 1000000.0;
+ return WriteBlockedTotal + TDuration::MicroSeconds(blockedUs); // append current blocking period if any
+ } else {
+ return WriteBlockedTotal;
+ }
+ }
+
+ ui64 OutputCounter;
+ ui64 LastSentSerial = 0;
+
+ TInstant LastHandshakeDone;
+
+ TIntrusivePtr<NInterconnect::TStreamSocket> Socket;
+ TPollerToken::TPtr PollerToken;
+ ui32 SendBufferSize;
+ ui64 InflightDataAmount = 0;
+
+ std::unordered_map<TActorId, ui64, TActorId::THash> Subscribers;
+
+ // time at which we want to send confirmation packet even if there was no outgoing data
+ ui64 UnconfirmedBytes = 0;
+ TInstant ForcePacketTimestamp = TInstant::Max();
+ TPriorityQueue<TInstant, TVector<TInstant>, std::greater<TInstant>> FlushSchedule;
+ size_t MaxFlushSchedule = 0;
+ ui64 FlushEventsScheduled = 0;
+ ui64 FlushEventsProcessed = 0;
+
+ void SetForcePacketTimestamp(TDuration period);
+ void ScheduleFlush();
+ void HandleFlush();
+ void ResetFlushLogic();
+
+ void GenerateHttpInfo(TStringStream& str);
+
+ TIntrusivePtr<TReceiveContext> ReceiveContext;
+ TActorId ReceiverId;
+ TDuration Ping;
+
+ ui64 ConfirmPacketsForcedBySize = 0;
+ ui64 ConfirmPacketsForcedByTimeout = 0;
+
+ ui64 LastConfirmed = 0;
+
+ TEvHandshakeDone::TPtr PendingHandshakeDoneEvent;
+ bool StartHandshakeOnSessionClose = false;
+
+ ui64 EqualizeCounter = 0;
+ };
+
+ class TInterconnectSessionKiller
+ : public TActorBootstrapped<TInterconnectSessionKiller> {
+ ui32 RepliesReceived = 0;
+ ui32 RepliesNumber = 0;
+ TActorId LargestSession = TActorId();
+ ui64 MaxBufferSize = 0;
+ TInterconnectProxyCommon::TPtr Common;
+
+ public:
+ static constexpr EActivityType ActorActivityType() {
+ return INTERCONNECT_SESSION_KILLER;
+ }
+
+ TInterconnectSessionKiller(TInterconnectProxyCommon::TPtr common)
+ : Common(common)
+ {
+ }
+
+ void Bootstrap() {
+ auto sender = SelfId();
+ const auto eventFabric = [&sender](const TActorId& recp) -> IEventHandle* {
+ auto ev = new TEvSessionBufferSizeRequest();
+ return new IEventHandle(recp, sender, ev, IEventHandle::FlagTrackDelivery);
+ };
+ RepliesNumber = TlsActivationContext->ExecutorThread.ActorSystem->BroadcastToProxies(eventFabric);
+ Become(&TInterconnectSessionKiller::StateFunc);
+ }
+
+ STRICT_STFUNC(StateFunc,
+ hFunc(TEvSessionBufferSizeResponse, ProcessResponse)
+ cFunc(TEvents::TEvUndelivered::EventType, ProcessUndelivered)
+ )
+
+ void ProcessResponse(TEvSessionBufferSizeResponse::TPtr& ev) {
+ RepliesReceived++;
+ if (MaxBufferSize < ev->Get()->BufferSize) {
+ MaxBufferSize = ev->Get()->BufferSize;
+ LargestSession = ev->Get()->SessionID;
+ }
+ if (RepliesReceived == RepliesNumber) {
+ Send(LargestSession, new TEvents::TEvPoisonPill);
+ AtomicUnlock(&Common->StartedSessionKiller);
+ PassAway();
+ }
+ }
+
+ void ProcessUndelivered() {
+ RepliesReceived++;
+ }
+ };
+
+ void CreateSessionKillingActor(TInterconnectProxyCommon::TPtr common);
+
+}
diff --git a/library/cpp/actors/interconnect/load.cpp b/library/cpp/actors/interconnect/load.cpp
new file mode 100644
index 0000000000..2a8443da71
--- /dev/null
+++ b/library/cpp/actors/interconnect/load.cpp
@@ -0,0 +1,405 @@
+#include "load.h"
+#include "interconnect_common.h"
+#include "events_local.h"
+#include <library/cpp/actors/protos/services_common.pb.h>
+#include <library/cpp/actors/core/log.h>
+#include <library/cpp/actors/core/actor_bootstrapped.h>
+#include <library/cpp/actors/core/events.h>
+#include <library/cpp/actors/core/hfunc.h>
+#include <util/generic/queue.h>
+
+namespace NInterconnect {
+ using namespace NActors;
+
+ enum {
+ EvGenerateMessages = EventSpaceBegin(TEvents::ES_PRIVATE),
+ EvPublishResults,
+ EvQueryTrafficCounter,
+ EvTrafficCounter,
+ };
+
+ struct TEvQueryTrafficCounter : TEventLocal<TEvQueryTrafficCounter, EvQueryTrafficCounter> {};
+
+ struct TEvTrafficCounter : TEventLocal<TEvTrafficCounter, EvTrafficCounter> {
+ std::shared_ptr<std::atomic_uint64_t> Traffic;
+
+ TEvTrafficCounter(std::shared_ptr<std::atomic_uint64_t> traffic)
+ : Traffic(std::move(traffic))
+ {}
+ };
+
+ class TLoadResponderActor : public TActor<TLoadResponderActor> {
+ STRICT_STFUNC(StateFunc,
+ HFunc(TEvLoadMessage, Handle);
+ CFunc(TEvents::TSystem::PoisonPill, Die);
+ )
+
+ void Handle(TEvLoadMessage::TPtr& ev, const TActorContext& ctx) {
+ ui64 bytes = ev->Get()->CalculateSerializedSizeCached();
+ auto& record = ev->Get()->Record;
+ auto *hops = record.MutableHops();
+ while (!hops->empty() && !hops->begin()->HasNextHop()) {
+ record.ClearPayload();
+ ev->Get()->StripPayload();
+ hops->erase(hops->begin());
+ }
+ if (!hops->empty()) {
+ // extract actor id of the next hop
+ const TActorId nextHopActorId = ActorIdFromProto(hops->begin()->GetNextHop());
+ hops->erase(hops->begin());
+
+ // forward message to next hop; preserve flags and cookie
+ auto msg = MakeHolder<TEvLoadMessage>();
+ record.Swap(&msg->Record);
+ bytes += msg->CalculateSerializedSizeCached();
+ ctx.Send(nextHopActorId, msg.Release(), ev->Flags, ev->Cookie);
+ }
+ *Traffic += bytes;
+ }
+
+ public:
+ TLoadResponderActor(std::shared_ptr<std::atomic_uint64_t> traffic)
+ : TActor(&TLoadResponderActor::StateFunc)
+ , Traffic(std::move(traffic))
+ {}
+
+ static constexpr IActor::EActivityType ActorActivityType() {
+ return IActor::INTERCONNECT_LOAD_RESPONDER;
+ }
+
+ private:
+ std::shared_ptr<std::atomic_uint64_t> Traffic;
+ };
+
+ class TLoadResponderMasterActor : public TActorBootstrapped<TLoadResponderMasterActor> {
+ TVector<TActorId> Slaves;
+ ui32 SlaveIndex = 0;
+
+ STRICT_STFUNC(StateFunc,
+ HFunc(TEvLoadMessage, Handle);
+ HFunc(TEvQueryTrafficCounter, Handle);
+ CFunc(TEvents::TSystem::PoisonPill, Die);
+ )
+
+ void Handle(TEvLoadMessage::TPtr& ev, const TActorContext& ctx) {
+ ctx.ExecutorThread.ActorSystem->Send(ev->Forward(Slaves[SlaveIndex]));
+ if (++SlaveIndex == Slaves.size()) {
+ SlaveIndex = 0;
+ }
+ }
+
+ void Handle(TEvQueryTrafficCounter::TPtr ev, const TActorContext& ctx) {
+ ctx.Send(ev->Sender, new TEvTrafficCounter(Traffic));
+ }
+
+ void Die(const TActorContext& ctx) override {
+ for (const TActorId& actorId : Slaves) {
+ ctx.Send(actorId, new TEvents::TEvPoisonPill);
+ }
+ TActorBootstrapped::Die(ctx);
+ }
+
+ public:
+ static constexpr IActor::EActivityType ActorActivityType() {
+ return IActor::INTERCONNECT_LOAD_RESPONDER;
+ }
+
+ TLoadResponderMasterActor()
+ {}
+
+ void Bootstrap(const TActorContext& ctx) {
+ Become(&TLoadResponderMasterActor::StateFunc);
+ while (Slaves.size() < 10) {
+ Slaves.push_back(ctx.Register(new TLoadResponderActor(Traffic)));
+ }
+ }
+
+ private:
+ std::shared_ptr<std::atomic_uint64_t> Traffic = std::make_shared<std::atomic_uint64_t>();
+ };
+
+ IActor* CreateLoadResponderActor() {
+ return new TLoadResponderMasterActor();
+ }
+
+ TActorId MakeLoadResponderActorId(ui32 nodeId) {
+ char x[12] = {'I', 'C', 'L', 'o', 'a', 'd', 'R', 'e', 's', 'p', 'A', 'c'};
+ return TActorId(nodeId, TStringBuf(x, 12));
+ }
+
+ class TLoadActor: public TActorBootstrapped<TLoadActor> {
+ struct TEvGenerateMessages : TEventLocal<TEvGenerateMessages, EvGenerateMessages> {};
+ struct TEvPublishResults : TEventLocal<TEvPublishResults, EvPublishResults> {};
+
+ struct TMessageInfo {
+ TInstant SendTimestamp;
+
+ TMessageInfo(const TInstant& sendTimestamp)
+ : SendTimestamp(sendTimestamp)
+ {
+ }
+ };
+
+ const TLoadParams Params;
+ TInstant NextMessageTimestamp;
+ THashMap<TString, TMessageInfo> InFly;
+ ui64 NextId = 1;
+ TVector<TActorId> Hops;
+ TActorId FirstHop;
+ ui64 NumDropped = 0;
+ std::shared_ptr<std::atomic_uint64_t> Traffic;
+
+ public:
+ static constexpr IActor::EActivityType ActorActivityType() {
+ return IActor::INTERCONNECT_LOAD_ACTOR;
+ }
+
+ TLoadActor(const TLoadParams& params)
+ : Params(params)
+ {}
+
+ void Bootstrap(const TActorContext& ctx) {
+ Become(&TLoadActor::QueryTrafficCounter);
+ ctx.Send(MakeLoadResponderActorId(SelfId().NodeId()), new TEvQueryTrafficCounter);
+ }
+
+ void Handle(TEvTrafficCounter::TPtr ev, const TActorContext& ctx) {
+ Traffic = std::move(ev->Get()->Traffic);
+
+ for (const ui32 nodeId : Params.NodeHops) {
+ const TActorId& actorId = nodeId ? MakeLoadResponderActorId(nodeId) : TActorId();
+ if (!FirstHop) {
+ FirstHop = actorId;
+ } else {
+ Hops.push_back(actorId);
+ }
+ }
+
+ Hops.push_back(ctx.SelfID);
+
+ Become(&TLoadActor::StateFunc);
+ NextMessageTimestamp = ctx.Now();
+ ResetThroughput(NextMessageTimestamp, *Traffic);
+ GenerateMessages(ctx);
+ ctx.Schedule(Params.Duration, new TEvents::TEvPoisonPill);
+ SchedulePublishResults(ctx);
+ }
+
+ void GenerateMessages(const TActorContext& ctx) {
+ while (InFly.size() < Params.InFlyMax && ctx.Now() >= NextMessageTimestamp) {
+ // generate payload
+ const ui32 size = Params.SizeMin + RandomNumber(Params.SizeMax - Params.SizeMin + 1);
+
+ // generate message id
+ const ui64 cookie = NextId++;
+ TString id = Sprintf("%" PRIu64, cookie);
+
+ // create message and send it to the first hop
+ THolder<TEvLoadMessage> ev;
+ if (Params.UseProtobufWithPayload && size) {
+ auto buffer = TRopeAlignedBuffer::Allocate(size);
+ memset(buffer->GetBuffer(), '*', size);
+ ev.Reset(new TEvLoadMessage(Hops, id, TRope(buffer)));
+ } else {
+ TString payload;
+ if (size) {
+ payload = TString::Uninitialized(size);
+ memset(payload.Detach(), '*', size);
+ }
+ ev.Reset(new TEvLoadMessage(Hops, id, payload ? &payload : nullptr));
+ }
+ UpdateThroughput(ev->CalculateSerializedSizeCached());
+ ctx.Send(FirstHop, ev.Release(), IEventHandle::MakeFlags(Params.Channel, 0), cookie);
+
+ // register in the map
+ InFly.emplace(id, TMessageInfo(ctx.Now()));
+
+ // put item into timeout queue
+ PutTimeoutQueueItem(ctx, id);
+
+ const TDuration duration = TDuration::MicroSeconds(Params.IntervalMin.GetValue() +
+ RandomNumber(Params.IntervalMax.GetValue() - Params.IntervalMin.GetValue() + 1));
+ if (Params.SoftLoad) {
+ NextMessageTimestamp += duration;
+ } else {
+ NextMessageTimestamp = ctx.Now() + duration;
+ }
+ }
+
+ // schedule next generate messages call
+ if (NextMessageTimestamp > ctx.Now() && InFly.size() < Params.InFlyMax) {
+ ctx.Schedule(NextMessageTimestamp - ctx.Now(), new TEvGenerateMessages);
+ }
+ }
+
+ void Handle(TEvLoadMessage::TPtr& ev, const TActorContext& ctx) {
+ const auto& record = ev->Get()->Record;
+ auto it = InFly.find(record.GetId());
+ if (it != InFly.end()) {
+ // record message rtt
+ const TDuration rtt = ctx.Now() - it->second.SendTimestamp;
+ UpdateHistogram(ctx.Now(), rtt);
+
+ // update throughput
+ UpdateThroughput(ev->Get()->CalculateSerializedSizeCached());
+
+ // remove message from the in fly map
+ InFly.erase(it);
+ } else {
+ ++NumDropped;
+ }
+ GenerateMessages(ctx);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // RTT HISTOGRAM
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ const TDuration AggregationPeriod = TDuration::Seconds(20);
+ TDeque<std::pair<TInstant, TDuration>> Histogram;
+
+ void UpdateHistogram(TInstant when, TDuration rtt) {
+ Histogram.emplace_back(when, rtt);
+
+ const TInstant barrier = when - AggregationPeriod;
+ while (Histogram && Histogram.front().first < barrier) {
+ Histogram.pop_front();
+ }
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // THROUGHPUT
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ TInstant ThroughputFirstSample = TInstant::Zero();
+ ui64 ThroughputSamples = 0;
+ ui64 ThroughputBytes = 0;
+ ui64 TrafficAtBegin = 0;
+
+ void UpdateThroughput(ui64 bytes) {
+ ThroughputBytes += bytes;
+ ++ThroughputSamples;
+ }
+
+ void ResetThroughput(TInstant when, ui64 traffic) {
+ ThroughputFirstSample = when;
+ ThroughputSamples = 0;
+ ThroughputBytes = 0;
+ TrafficAtBegin = traffic;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // TIMEOUT QUEUE OPERATIONS
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ TQueue<std::pair<TInstant, TString>> TimeoutQueue;
+
+ void PutTimeoutQueueItem(const TActorContext& ctx, TString id) {
+ TimeoutQueue.emplace(ctx.Now() + TDuration::Minutes(1), std::move(id));
+ if (TimeoutQueue.size() == 1) {
+ ScheduleWakeup(ctx);
+ }
+ }
+
+ void ScheduleWakeup(const TActorContext& ctx) {
+ ctx.Schedule(TimeoutQueue.front().first - ctx.Now(), new TEvents::TEvWakeup);
+ }
+
+ void HandleWakeup(const TActorContext& ctx) {
+ ui32 numDropped = 0;
+
+ while (TimeoutQueue && TimeoutQueue.front().first <= ctx.Now()) {
+ numDropped += InFly.erase(TimeoutQueue.front().second);
+ TimeoutQueue.pop();
+ }
+ if (TimeoutQueue) {
+ // we still have some elements in timeout queue, so schedule next wake up to tidy up
+ ScheduleWakeup(ctx);
+ }
+
+ GenerateMessages(ctx);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // RESULT PUBLISHING
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ const TDuration ResultPublishPeriod = TDuration::Seconds(15);
+
+ void SchedulePublishResults(const TActorContext& ctx) {
+ ctx.Schedule(ResultPublishPeriod, new TEvPublishResults);
+ }
+
+ void PublishResults(const TActorContext& ctx, bool schedule = true) {
+ const TInstant now = ctx.Now();
+
+ TStringStream msg;
+
+ msg << "Load# '" << Params.Name << "'";
+
+ msg << " Throughput# ";
+ const TDuration duration = now - ThroughputFirstSample;
+ const ui64 traffic = *Traffic;
+ msg << "{window# " << duration
+ << " bytes# " << ThroughputBytes
+ << " samples# " << ThroughputSamples
+ << " b/s# " << ui64(ThroughputBytes * 1000000 / duration.MicroSeconds())
+ << " common# " << ui64((traffic - TrafficAtBegin) * 1000000 / duration.MicroSeconds())
+ << "}";
+ ResetThroughput(now, traffic);
+
+ msg << " RTT# ";
+ if (Histogram) {
+ const TDuration duration = Histogram.back().first - Histogram.front().first;
+ msg << "{window# " << duration << " samples# " << Histogram.size();
+ TVector<TDuration> v;
+ v.reserve(Histogram.size());
+ for (const auto& item : Histogram) {
+ v.push_back(item.second);
+ }
+ std::sort(v.begin(), v.end());
+ for (double q : {0.5, 0.9, 0.99, 0.999, 0.9999, 1.0}) {
+ const size_t pos = q * (v.size() - 1);
+ msg << Sprintf(" %.4f# %s", q, v[pos].ToString().data());
+ }
+ msg << "}";
+ } else {
+ msg << "<empty>";
+ }
+
+ msg << " NumDropped# " << NumDropped;
+
+ if (!schedule) {
+ msg << " final";
+ }
+
+ LOG_NOTICE(ctx, NActorsServices::INTERCONNECT_SPEED_TEST, "%s", msg.Str().data());
+
+ if (schedule) {
+ SchedulePublishResults(ctx);
+ }
+ }
+
+ STRICT_STFUNC(QueryTrafficCounter,
+ HFunc(TEvTrafficCounter, Handle);
+ )
+
+ STRICT_STFUNC(StateFunc,
+ CFunc(TEvents::TSystem::PoisonPill, Die);
+ CFunc(TEvents::TSystem::Wakeup, HandleWakeup);
+ CFunc(EvPublishResults, PublishResults);
+ CFunc(EvGenerateMessages, GenerateMessages);
+ HFunc(TEvLoadMessage, Handle);
+ )
+
+ void Die(const TActorContext& ctx) override {
+ PublishResults(ctx, false);
+ TActorBootstrapped::Die(ctx);
+ }
+ };
+
+ IActor* CreateLoadActor(const TLoadParams& params) {
+ return new TLoadActor(params);
+ }
+
+}
diff --git a/library/cpp/actors/interconnect/load.h b/library/cpp/actors/interconnect/load.h
new file mode 100644
index 0000000000..0a01a0dc04
--- /dev/null
+++ b/library/cpp/actors/interconnect/load.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <library/cpp/actors/core/actor.h>
+
+namespace NInterconnect {
+ // load responder -- lives on every node as a service actor
+ NActors::IActor* CreateLoadResponderActor();
+ NActors::TActorId MakeLoadResponderActorId(ui32 node);
+
+ // load actor -- generates load with specific parameters
+ struct TLoadParams {
+ TString Name;
+ ui32 Channel;
+ TVector<ui32> NodeHops; // node ids for the message route
+ ui32 SizeMin, SizeMax; // min and max size for payloads
+ ui32 InFlyMax; // maximum number of in fly messages
+ TDuration IntervalMin, IntervalMax; // min and max intervals between sending messages
+ bool SoftLoad; // is the load soft?
+ TDuration Duration; // test duration
+ bool UseProtobufWithPayload; // store payload separately
+ };
+ NActors::IActor* CreateLoadActor(const TLoadParams& params);
+
+}
diff --git a/library/cpp/actors/interconnect/logging.h b/library/cpp/actors/interconnect/logging.h
new file mode 100644
index 0000000000..c429d1cade
--- /dev/null
+++ b/library/cpp/actors/interconnect/logging.h
@@ -0,0 +1,68 @@
+#pragma once
+
+#include <library/cpp/actors/core/log.h>
+#include <library/cpp/actors/protos/services_common.pb.h>
+
+#define LOG_LOG_IC_X(component, marker, priority, ...) \
+ do { \
+ LOG_LOG(this->GetActorContext(), (priority), (component), "%s " marker " %s", LogPrefix.data(), Sprintf(__VA_ARGS__).data()); \
+ } while (false)
+
+#define LOG_LOG_NET_X(priority, NODE_ID, FMT, ...) \
+ do { \
+ const TActorContext& ctx = this->GetActorContext(); \
+ LOG_LOG(ctx, (priority), ::NActorsServices::INTERCONNECT_NETWORK, "[%" PRIu32 " <-> %" PRIu32 "] %s", \
+ ctx.SelfID.NodeId(), (NODE_ID), Sprintf(FMT, __VA_ARGS__).data()); \
+ } while (false)
+
+#define LOG_LOG_IC(component, marker, priority, ...) \
+ do { \
+ LOG_LOG(::NActors::TActivationContext::AsActorContext(), (priority), (component), "%s " marker " %s", LogPrefix.data(), Sprintf(__VA_ARGS__).data()); \
+ } while (false)
+
+#define LOG_LOG_NET(priority, NODE_ID, FMT, ...) \
+ do { \
+ const TActorContext& ctx = ::NActors::TActivationContext::AsActorContext(); \
+ LOG_LOG(ctx, (priority), ::NActorsServices::INTERCONNECT_NETWORK, "[%" PRIu32 " <-> %" PRIu32 "] %s", \
+ ctx.SelfID.NodeId(), (NODE_ID), Sprintf(FMT, __VA_ARGS__).data()); \
+ } while (false)
+
+#define LOG_EMER_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_EMER, __VA_ARGS__)
+#define LOG_ALERT_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_ALERT, __VA_ARGS__)
+#define LOG_CRIT_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_CRIT, __VA_ARGS__)
+#define LOG_ERROR_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_ERROR, __VA_ARGS__)
+#define LOG_WARN_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_WARN, __VA_ARGS__)
+#define LOG_NOTICE_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_NOTICE, __VA_ARGS__)
+#define LOG_INFO_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_INFO, __VA_ARGS__)
+#define LOG_DEBUG_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_DEBUG, __VA_ARGS__)
+
+#define LOG_EMER_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_EMER, __VA_ARGS__)
+#define LOG_ALERT_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_ALERT, __VA_ARGS__)
+#define LOG_CRIT_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_CRIT, __VA_ARGS__)
+#define LOG_ERROR_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_ERROR, __VA_ARGS__)
+#define LOG_WARN_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_WARN, __VA_ARGS__)
+#define LOG_NOTICE_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_NOTICE, __VA_ARGS__)
+#define LOG_INFO_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_INFO, __VA_ARGS__)
+#define LOG_DEBUG_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_DEBUG, __VA_ARGS__)
+
+#define LOG_NOTICE_NET(NODE_ID, FMT, ...) LOG_LOG_NET(::NActors::NLog::PRI_NOTICE, NODE_ID, FMT, __VA_ARGS__)
+#define LOG_DEBUG_NET(NODE_ID, FMT, ...) LOG_LOG_NET(::NActors::NLog::PRI_DEBUG, NODE_ID, FMT, __VA_ARGS__)
+
+namespace NActors {
+ class TInterconnectLoggingBase {
+ protected:
+ const TString LogPrefix;
+
+ public:
+ TInterconnectLoggingBase() = default;
+
+ TInterconnectLoggingBase(const TString& prefix)
+ : LogPrefix(prefix)
+ {
+ }
+
+ void SetPrefix(TString logPrefix) const {
+ logPrefix.swap(const_cast<TString&>(LogPrefix));
+ }
+ };
+}
diff --git a/library/cpp/actors/interconnect/mock/ic_mock.cpp b/library/cpp/actors/interconnect/mock/ic_mock.cpp
new file mode 100644
index 0000000000..884503e602
--- /dev/null
+++ b/library/cpp/actors/interconnect/mock/ic_mock.cpp
@@ -0,0 +1,298 @@
+#include "ic_mock.h"
+#include <library/cpp/actors/core/interconnect.h>
+#include <util/system/yield.h>
+#include <thread>
+
+namespace NActors {
+
+ class TInterconnectMock::TImpl {
+ enum {
+ EvInject = EventSpaceBegin(TEvents::ES_PRIVATE),
+ EvCheckSession,
+ EvRam,
+ };
+
+ struct TEvInject : TEventLocal<TEvInject, EvInject> {
+ std::deque<std::unique_ptr<IEventHandle>> Messages;
+ const TScopeId OriginScopeId;
+ const ui64 SenderSessionId;
+
+ TEvInject(std::deque<std::unique_ptr<IEventHandle>>&& messages, const TScopeId& originScopeId, ui64 senderSessionId)
+ : Messages(std::move(messages))
+ , OriginScopeId(originScopeId)
+ , SenderSessionId(senderSessionId)
+ {}
+ };
+
+ class TProxyMockActor;
+
+ class TConnectionState {
+ struct TPeerInfo {
+ TRWMutex Mutex;
+ TActorSystem *ActorSystem = nullptr;
+ TActorId ProxyId;
+ };
+
+ const ui64 Key;
+ TPeerInfo PeerInfo[2];
+ std::atomic_uint64_t SessionId = 0;
+
+ public:
+ TConnectionState(ui64 key)
+ : Key(key)
+ {}
+
+ void Attach(ui32 nodeId, TActorSystem *as, const TActorId& actorId) {
+ TPeerInfo *peer = GetPeer(nodeId);
+ auto guard = TWriteGuard(peer->Mutex);
+ Y_VERIFY(!peer->ActorSystem);
+ peer->ActorSystem = as;
+ peer->ProxyId = actorId;
+ as->DeferPreStop([peer] {
+ auto guard = TWriteGuard(peer->Mutex);
+ peer->ActorSystem = nullptr;
+ });
+ }
+
+ void Inject(ui32 peerNodeId, std::deque<std::unique_ptr<IEventHandle>>&& messages,
+ const TScopeId& originScopeId, ui64 senderSessionId) {
+ TPeerInfo *peer = GetPeer(peerNodeId);
+ auto guard = TReadGuard(peer->Mutex);
+ if (peer->ActorSystem) {
+ peer->ActorSystem->Send(new IEventHandle(peer->ProxyId, TActorId(), new TEvInject(std::move(messages),
+ originScopeId, senderSessionId)));
+ } else {
+ for (auto&& ev : messages) {
+ TActivationContext::Send(ev->ForwardOnNondelivery(TEvents::TEvUndelivered::Disconnected));
+ }
+ }
+ }
+
+ ui64 GetValidSessionId() const {
+ return SessionId;
+ }
+
+ void InvalidateSessionId(ui32 peerNodeId) {
+ ++SessionId;
+ TPeerInfo *peer = GetPeer(peerNodeId);
+ auto guard = TReadGuard(peer->Mutex);
+ if (peer->ActorSystem) {
+ peer->ActorSystem->Send(new IEventHandle(EvCheckSession, 0, peer->ProxyId, {}, nullptr, 0));
+ }
+ }
+
+ private:
+ TPeerInfo *GetPeer(ui32 nodeId) {
+ if (nodeId == ui32(Key)) {
+ return PeerInfo;
+ } else if (nodeId == ui32(Key >> 32)) {
+ return PeerInfo + 1;
+ } else {
+ Y_FAIL();
+ }
+ }
+ };
+
+ class TProxyMockActor : public TActor<TProxyMockActor> {
+ class TSessionMockActor : public TActor<TSessionMockActor> {
+ std::map<TActorId, ui64> Subscribers;
+ TProxyMockActor* const Proxy;
+ std::deque<std::unique_ptr<IEventHandle>> Queue;
+
+ public:
+ const ui64 SessionId;
+
+ public:
+ TSessionMockActor(TProxyMockActor *proxy, ui64 sessionId)
+ : TActor(&TThis::StateFunc)
+ , Proxy(proxy)
+ , SessionId(sessionId)
+ {}
+
+ void Terminate() {
+ for (auto&& ev : std::exchange(Queue, {})) {
+ TActivationContext::Send(ev->ForwardOnNondelivery(TEvents::TEvUndelivered::Disconnected));
+ }
+ for (const auto& kv : Subscribers) {
+ Send(kv.first, new TEvInterconnect::TEvNodeDisconnected(Proxy->PeerNodeId), 0, kv.second);
+ }
+ Y_VERIFY(Proxy->Session == this);
+ Proxy->Session = nullptr;
+ PassAway();
+ }
+
+ void HandleForward(TAutoPtr<IEventHandle> ev) {
+ if (ev->Flags & IEventHandle::FlagSubscribeOnSession) {
+ Subscribe(ev->Sender, ev->Cookie);
+ }
+ if (Queue.empty()) {
+ TActivationContext::Send(new IEventHandle(EvRam, 0, SelfId(), {}, {}, 0));
+ }
+ Queue.emplace_back(ev.Release());
+ }
+
+ void HandleRam() {
+ if (SessionId != Proxy->State.GetValidSessionId()) {
+ Terminate();
+ } else {
+ Proxy->PeerInject(std::exchange(Queue, {}));
+ }
+ }
+
+ void Handle(TEvInterconnect::TEvConnectNode::TPtr ev) {
+ Subscribe(ev->Sender, ev->Cookie);
+ }
+
+ void Handle(TEvents::TEvSubscribe::TPtr ev) {
+ Subscribe(ev->Sender, ev->Cookie);
+ }
+
+ void Handle(TEvents::TEvUnsubscribe::TPtr ev) {
+ Subscribers.erase(ev->Sender);
+ }
+
+ void HandlePoison() {
+ Proxy->Disconnect();
+ }
+
+ STRICT_STFUNC(StateFunc,
+ fFunc(TEvInterconnect::EvForward, HandleForward)
+ hFunc(TEvInterconnect::TEvConnectNode, Handle)
+ hFunc(TEvents::TEvSubscribe, Handle)
+ hFunc(TEvents::TEvUnsubscribe, Handle)
+ cFunc(TEvents::TSystem::Poison, HandlePoison)
+ cFunc(EvRam, HandleRam)
+ )
+
+ private:
+ void Subscribe(const TActorId& actorId, ui64 cookie) {
+ Subscribers[actorId] = cookie;
+ Send(actorId, new TEvInterconnect::TEvNodeConnected(Proxy->PeerNodeId), 0, cookie);
+ }
+ };
+
+ friend class TSessionMockActor;
+
+ const ui32 NodeId;
+ const ui32 PeerNodeId;
+ TConnectionState& State;
+ const TInterconnectProxyCommon::TPtr Common;
+ TSessionMockActor *Session = nullptr;
+
+ public:
+ TProxyMockActor(ui32 nodeId, ui32 peerNodeId, TConnectionState& state, TInterconnectProxyCommon::TPtr common)
+ : TActor(&TThis::StateFunc)
+ , NodeId(nodeId)
+ , PeerNodeId(peerNodeId)
+ , State(state)
+ , Common(std::move(common))
+ {}
+
+ void Registered(TActorSystem *as, const TActorId& parent) override {
+ TActor::Registered(as, parent);
+ State.Attach(NodeId, as, SelfId());
+ }
+
+ void Handle(TEvInject::TPtr ev) {
+ auto *msg = ev->Get();
+ if (Session && Session->SessionId != msg->SenderSessionId) {
+ return; // drop messages from other sessions
+ }
+ if (auto *session = GetSession()) {
+ for (auto&& ev : ev->Get()->Messages) {
+ auto fw = std::make_unique<IEventHandle>(
+ session->SelfId(),
+ ev->Type,
+ ev->Flags & ~IEventHandle::FlagForwardOnNondelivery,
+ ev->Recipient,
+ ev->Sender,
+ ev->ReleaseChainBuffer(),
+ ev->Cookie,
+ msg->OriginScopeId,
+ std::move(ev->TraceId)
+ );
+ if (!Common->EventFilter || Common->EventFilter->CheckIncomingEvent(*fw, Common->LocalScopeId)) {
+ TActivationContext::Send(fw.release());
+ }
+ }
+ }
+ }
+
+ void PassAway() override {
+ Disconnect();
+ TActor::PassAway();
+ }
+
+ TSessionMockActor *GetSession() {
+ CheckSession();
+ if (!Session) {
+ Session = new TSessionMockActor(this, State.GetValidSessionId());
+ RegisterWithSameMailbox(Session);
+ }
+ return Session;
+ }
+
+ void HandleSessionEvent(TAutoPtr<IEventHandle> ev) {
+ auto *session = GetSession();
+ InvokeOtherActor(*session, &TSessionMockActor::Receive, ev,
+ TActivationContext::ActorContextFor(session->SelfId()));
+ }
+
+ void Disconnect() {
+ State.InvalidateSessionId(PeerNodeId);
+ if (Session) {
+ Session->Terminate();
+ }
+ }
+
+ void CheckSession() {
+ if (Session && Session->SessionId != State.GetValidSessionId()) {
+ Session->Terminate();
+ }
+ }
+
+ void PeerInject(std::deque<std::unique_ptr<IEventHandle>>&& messages) {
+ Y_VERIFY(Session);
+ return State.Inject(PeerNodeId, std::move(messages), Common->LocalScopeId, Session->SessionId);
+ }
+
+ STRICT_STFUNC(StateFunc,
+ cFunc(TEvents::TSystem::Poison, PassAway)
+ fFunc(TEvInterconnect::EvForward, HandleSessionEvent)
+ fFunc(TEvInterconnect::EvConnectNode, HandleSessionEvent)
+ fFunc(TEvents::TSystem::Subscribe, HandleSessionEvent)
+ fFunc(TEvents::TSystem::Unsubscribe, HandleSessionEvent)
+ cFunc(TEvInterconnect::EvDisconnect, Disconnect)
+ IgnoreFunc(TEvInterconnect::TEvClosePeerSocket)
+ IgnoreFunc(TEvInterconnect::TEvCloseInputSession)
+ cFunc(TEvInterconnect::EvPoisonSession, Disconnect)
+ hFunc(TEvInject, Handle)
+ cFunc(EvCheckSession, CheckSession)
+ )
+ };
+
+ std::unordered_map<ui64, TConnectionState> States;
+
+ public:
+ IActor *CreateProxyMock(ui32 nodeId, ui32 peerNodeId, TInterconnectProxyCommon::TPtr common) {
+ Y_VERIFY(nodeId != peerNodeId);
+ Y_VERIFY(nodeId);
+ Y_VERIFY(peerNodeId);
+ const ui64 key = std::min(nodeId, peerNodeId) | ui64(std::max(nodeId, peerNodeId)) << 32;
+ auto it = States.try_emplace(key, key).first;
+ return new TProxyMockActor(nodeId, peerNodeId, it->second, std::move(common));
+ }
+ };
+
+ TInterconnectMock::TInterconnectMock()
+ : Impl(std::make_unique<TImpl>())
+ {}
+
+ TInterconnectMock::~TInterconnectMock()
+ {}
+
+ IActor *TInterconnectMock::CreateProxyMock(ui32 nodeId, ui32 peerNodeId, TInterconnectProxyCommon::TPtr common) {
+ return Impl->CreateProxyMock(nodeId, peerNodeId, std::move(common));
+ }
+
+} // NActors
diff --git a/library/cpp/actors/interconnect/mock/ic_mock.h b/library/cpp/actors/interconnect/mock/ic_mock.h
new file mode 100644
index 0000000000..636bdc2b7f
--- /dev/null
+++ b/library/cpp/actors/interconnect/mock/ic_mock.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <library/cpp/actors/core/actor.h>
+
+#include <library/cpp/actors/interconnect/interconnect_common.h>
+
+namespace NActors {
+
+ class TInterconnectMock {
+ class TImpl;
+ std::unique_ptr<TImpl> Impl;
+
+ public:
+ TInterconnectMock();
+ ~TInterconnectMock();
+ IActor *CreateProxyMock(ui32 nodeId, ui32 peerNodeId, TInterconnectProxyCommon::TPtr common);
+ };
+
+} // NActors
diff --git a/library/cpp/actors/interconnect/mock/tsan.supp b/library/cpp/actors/interconnect/mock/tsan.supp
new file mode 100644
index 0000000000..19fd059419
--- /dev/null
+++ b/library/cpp/actors/interconnect/mock/tsan.supp
@@ -0,0 +1 @@
+deadlock:Attach
diff --git a/library/cpp/actors/interconnect/mock/ya.make b/library/cpp/actors/interconnect/mock/ya.make
new file mode 100644
index 0000000000..19a2834162
--- /dev/null
+++ b/library/cpp/actors/interconnect/mock/ya.make
@@ -0,0 +1,16 @@
+LIBRARY()
+
+OWNER(alexvru)
+
+SRCS(
+ ic_mock.cpp
+ ic_mock.h
+)
+
+SUPPRESSIONS(tsan.supp)
+
+PEERDIR(
+ library/cpp/actors/interconnect
+)
+
+END()
diff --git a/library/cpp/actors/interconnect/packet.cpp b/library/cpp/actors/interconnect/packet.cpp
new file mode 100644
index 0000000000..e2c289ed59
--- /dev/null
+++ b/library/cpp/actors/interconnect/packet.cpp
@@ -0,0 +1,32 @@
+#include "packet.h"
+
+#include <library/cpp/actors/core/probes.h>
+
+#include <util/system/datetime.h>
+
+LWTRACE_USING(ACTORLIB_PROVIDER);
+
+ui32 TEventHolder::Fill(IEventHandle& ev) {
+ Serial = 0;
+ Descr.Type = ev.Type;
+ Descr.Flags = ev.Flags;
+ Descr.Recipient = ev.Recipient;
+ Descr.Sender = ev.Sender;
+ Descr.Cookie = ev.Cookie;
+ ev.TraceId.Serialize(&Descr.TraceId);
+ ForwardRecipient = ev.GetForwardOnNondeliveryRecipient();
+ EventActuallySerialized = 0;
+ Descr.Checksum = 0;
+
+ if (ev.HasBuffer()) {
+ Buffer = ev.ReleaseChainBuffer();
+ EventSerializedSize = Buffer->GetSize();
+ } else if (ev.HasEvent()) {
+ Event.Reset(ev.ReleaseBase());
+ EventSerializedSize = Event->CalculateSerializedSize();
+ } else {
+ EventSerializedSize = 0;
+ }
+
+ return EventSerializedSize;
+}
diff --git a/library/cpp/actors/interconnect/packet.h b/library/cpp/actors/interconnect/packet.h
new file mode 100644
index 0000000000..4ba50a2b5f
--- /dev/null
+++ b/library/cpp/actors/interconnect/packet.h
@@ -0,0 +1,324 @@
+#pragma once
+
+#include <library/cpp/actors/core/event_pb.h>
+#include <library/cpp/actors/core/event_load.h>
+#include <library/cpp/actors/core/events.h>
+#include <library/cpp/actors/core/actor.h>
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+#include <library/cpp/actors/util/rope.h>
+#include <library/cpp/actors/prof/tag.h>
+#include <library/cpp/digest/crc32c/crc32c.h>
+#include <library/cpp/lwtrace/shuttle.h>
+#include <util/generic/string.h>
+#include <util/generic/list.h>
+
+#ifndef FORCE_EVENT_CHECKSUM
+#define FORCE_EVENT_CHECKSUM 0
+#endif
+
+using NActors::IEventBase;
+using NActors::IEventHandle;
+using NActors::TActorId;
+using NActors::TConstIoVec;
+using NActors::TEventSerializedData;
+
+Y_FORCE_INLINE ui32 Crc32cExtendMSanCompatible(ui32 checksum, const void *data, size_t len) {
+ if constexpr (NSan::MSanIsOn()) {
+ const char *begin = static_cast<const char*>(data);
+ const char *end = begin + len;
+ begin -= reinterpret_cast<uintptr_t>(begin) & 15;
+ end += -reinterpret_cast<uintptr_t>(end) & 15;
+ NSan::Unpoison(begin, end - begin);
+ }
+ return Crc32cExtend(checksum, data, len);
+}
+
+struct TSessionParams {
+ bool Encryption = {};
+ bool UseModernFrame = {};
+ bool AuthOnly = {};
+ TString AuthCN;
+ NActors::TScopeId PeerScopeId;
+};
+
+struct TTcpPacketHeader_v1 {
+ ui32 HeaderCRC32;
+ ui32 PayloadCRC32;
+ ui64 Confirm;
+ ui64 Serial;
+ ui64 DataSize;
+
+ inline bool Check() const {
+ ui32 actual = Crc32cExtendMSanCompatible(0, &PayloadCRC32, sizeof(TTcpPacketHeader_v1) - sizeof(HeaderCRC32));
+ return actual == HeaderCRC32;
+ }
+
+ inline void Sign() {
+ HeaderCRC32 = Crc32cExtendMSanCompatible(0, &PayloadCRC32, sizeof(TTcpPacketHeader_v1) - sizeof(HeaderCRC32));
+ }
+
+ TString ToString() const {
+ return Sprintf("{Confirm# %" PRIu64 " Serial# %" PRIu64 " DataSize# %" PRIu64 "}", Confirm, Serial, DataSize);
+ }
+};
+
+#pragma pack(push, 1)
+struct TTcpPacketHeader_v2 {
+ ui64 Confirm;
+ ui64 Serial;
+ ui32 Checksum; // for the whole frame
+ ui16 PayloadLength;
+};
+#pragma pack(pop)
+
+union TTcpPacketBuf {
+ static constexpr ui64 PingRequestMask = 0x8000000000000000ULL;
+ static constexpr ui64 PingResponseMask = 0x4000000000000000ULL;
+ static constexpr ui64 ClockMask = 0x2000000000000000ULL;
+
+ static constexpr size_t PacketDataLen = 4096 * 2 - 96 - Max(sizeof(TTcpPacketHeader_v1), sizeof(TTcpPacketHeader_v2));
+ struct {
+ TTcpPacketHeader_v1 Header;
+ char Data[PacketDataLen];
+ } v1;
+ struct {
+ TTcpPacketHeader_v2 Header;
+ char Data[PacketDataLen];
+ } v2;
+};
+
+#pragma pack(push, 1)
+struct TEventDescr {
+ ui32 Type;
+ ui32 Flags;
+ TActorId Recipient;
+ TActorId Sender;
+ ui64 Cookie;
+ // wilson trace id is stored as a serialized entity to avoid using complex object with prohibited copy ctor
+ NWilson::TTraceId::TSerializedTraceId TraceId;
+ ui32 Checksum;
+};
+#pragma pack(pop)
+
+struct TEventHolder : TNonCopyable {
+ TEventDescr Descr;
+ TActorId ForwardRecipient;
+ THolder<IEventBase> Event;
+ TIntrusivePtr<TEventSerializedData> Buffer;
+ ui64 Serial;
+ ui32 EventSerializedSize;
+ ui32 EventActuallySerialized;
+ mutable NLWTrace::TOrbit Orbit;
+
+ ui32 Fill(IEventHandle& ev);
+
+ void InitChecksum() {
+ Descr.Checksum = 0;
+ }
+
+ void UpdateChecksum(const TSessionParams& params, const void *buffer, size_t len) {
+ if (FORCE_EVENT_CHECKSUM || !params.UseModernFrame) {
+ Descr.Checksum = Crc32cExtendMSanCompatible(Descr.Checksum, buffer, len);
+ }
+ }
+
+ void ForwardOnNondelivery(bool unsure) {
+ TEventDescr& d = Descr;
+ const TActorId& r = d.Recipient;
+ const TActorId& s = d.Sender;
+ const TActorId *f = ForwardRecipient ? &ForwardRecipient : nullptr;
+ auto ev = Event
+ ? std::make_unique<IEventHandle>(r, s, Event.Release(), d.Flags, d.Cookie, f, NWilson::TTraceId(d.TraceId))
+ : std::make_unique<IEventHandle>(d.Type, d.Flags, r, s, std::move(Buffer), d.Cookie, f, NWilson::TTraceId(d.TraceId));
+ NActors::TActivationContext::Send(ev->ForwardOnNondelivery(NActors::TEvents::TEvUndelivered::Disconnected, unsure));
+ }
+
+ void Clear() {
+ Event.Reset();
+ Buffer.Reset();
+ Orbit.Reset();
+ }
+};
+
+namespace NActors {
+ class TEventOutputChannel;
+}
+
+struct TTcpPacketOutTask : TNonCopyable {
+ const TSessionParams& Params;
+ TTcpPacketBuf Packet;
+ size_t DataSize;
+ TStackVec<TConstIoVec, 32> Bufs;
+ size_t BufferIndex;
+ size_t FirstBufferOffset;
+ bool TriedWriting;
+ char *FreeArea;
+ char *End;
+ mutable NLWTrace::TOrbit Orbit;
+
+public:
+ TTcpPacketOutTask(const TSessionParams& params)
+ : Params(params)
+ {
+ Reuse();
+ }
+
+ template<typename T>
+ auto ApplyToHeader(T&& callback) {
+ return Params.UseModernFrame ? callback(Packet.v2.Header) : callback(Packet.v1.Header);
+ }
+
+ template<typename T>
+ auto ApplyToHeader(T&& callback) const {
+ return Params.UseModernFrame ? callback(Packet.v2.Header) : callback(Packet.v1.Header);
+ }
+
+ bool IsAtBegin() const {
+ return !BufferIndex && !FirstBufferOffset && !TriedWriting;
+ }
+
+ void MarkTriedWriting() {
+ TriedWriting = true;
+ }
+
+ void Reuse() {
+ DataSize = 0;
+ ApplyToHeader([this](auto& header) { Bufs.assign(1, {&header, sizeof(header)}); });
+ BufferIndex = 0;
+ FirstBufferOffset = 0;
+ TriedWriting = false;
+ FreeArea = Params.UseModernFrame ? Packet.v2.Data : Packet.v1.Data;
+ End = FreeArea + TTcpPacketBuf::PacketDataLen;
+ Orbit.Reset();
+ }
+
+ bool IsEmpty() const {
+ return !DataSize;
+ }
+
+ void SetMetadata(ui64 serial, ui64 confirm) {
+ ApplyToHeader([&](auto& header) {
+ header.Serial = serial;
+ header.Confirm = confirm;
+ });
+ }
+
+ void UpdateConfirmIfPossible(ui64 confirm) {
+ // we don't want to recalculate whole packet checksum for single confirmation update on v2
+ if (!Params.UseModernFrame && IsAtBegin() && confirm != Packet.v1.Header.Confirm) {
+ Packet.v1.Header.Confirm = confirm;
+ Packet.v1.Header.Sign();
+ }
+ }
+
+ size_t GetDataSize() const { return DataSize; }
+
+ ui64 GetSerial() const {
+ return ApplyToHeader([](auto& header) { return header.Serial; });
+ }
+
+ bool Confirmed(ui64 confirm) const {
+ return ApplyToHeader([&](auto& header) { return IsEmpty() || header.Serial <= confirm; });
+ }
+
+ void *GetFreeArea() {
+ return FreeArea;
+ }
+
+ size_t GetVirtualFreeAmount() const {
+ return TTcpPacketBuf::PacketDataLen - DataSize;
+ }
+
+ void AppendBuf(const void *buf, size_t size) {
+ DataSize += size;
+ Y_VERIFY_DEBUG(DataSize <= TTcpPacketBuf::PacketDataLen, "DataSize# %zu AppendBuf buf# %p size# %zu"
+ " FreeArea# %p End# %p", DataSize, buf, size, FreeArea, End);
+
+ if (Bufs && static_cast<const char*>(Bufs.back().Data) + Bufs.back().Size == buf) {
+ Bufs.back().Size += size;
+ } else {
+ Bufs.push_back({buf, size});
+ }
+
+ if (buf >= FreeArea && buf < End) {
+ Y_VERIFY_DEBUG(buf == FreeArea);
+ FreeArea = const_cast<char*>(static_cast<const char*>(buf)) + size;
+ Y_VERIFY_DEBUG(FreeArea <= End);
+ }
+ }
+
+ void Undo(size_t size) {
+ Y_VERIFY(Bufs);
+ auto& buf = Bufs.back();
+ Y_VERIFY(buf.Data == FreeArea - buf.Size);
+ buf.Size -= size;
+ if (!buf.Size) {
+ Bufs.pop_back();
+ }
+ FreeArea -= size;
+ DataSize -= size;
+ }
+
+ bool DropBufs(size_t& amount) {
+ while (BufferIndex != Bufs.size()) {
+ TConstIoVec& item = Bufs[BufferIndex];
+ // calculate number of bytes to the end in current buffer
+ const size_t remain = item.Size - FirstBufferOffset;
+ if (amount >= remain) {
+ // vector item completely fits into the received amount, drop it out and switch to next buffer
+ amount -= remain;
+ ++BufferIndex;
+ FirstBufferOffset = 0;
+ } else {
+ // adjust first buffer by "amount" bytes forward and reset amount to zero
+ FirstBufferOffset += amount;
+ amount = 0;
+ // return false meaning that we have some more data to send
+ return false;
+ }
+ }
+ return true;
+ }
+
+ void ResetBufs() {
+ BufferIndex = FirstBufferOffset = 0;
+ TriedWriting = false;
+ }
+
+ template <typename TVectorType>
+ void AppendToIoVector(TVectorType& vector, size_t max) {
+ for (size_t k = BufferIndex, offset = FirstBufferOffset; k != Bufs.size() && vector.size() < max; ++k, offset = 0) {
+ TConstIoVec v = Bufs[k];
+ v.Data = static_cast<const char*>(v.Data) + offset;
+ v.Size -= offset;
+ vector.push_back(v);
+ }
+ }
+
+ void Sign() {
+ if (Params.UseModernFrame) {
+ Packet.v2.Header.Checksum = 0;
+ Packet.v2.Header.PayloadLength = DataSize;
+ if (!Params.Encryption) {
+ ui32 sum = 0;
+ for (const auto& item : Bufs) {
+ sum = Crc32cExtendMSanCompatible(sum, item.Data, item.Size);
+ }
+ Packet.v2.Header.Checksum = sum;
+ }
+ } else {
+ Y_VERIFY(!Bufs.empty());
+ auto it = Bufs.begin();
+ static constexpr size_t headerLen = sizeof(TTcpPacketHeader_v1);
+ Y_VERIFY(it->Data == &Packet.v1.Header && it->Size >= headerLen);
+ ui32 sum = Crc32cExtendMSanCompatible(0, Packet.v1.Data, it->Size - headerLen);
+ while (++it != Bufs.end()) {
+ sum = Crc32cExtendMSanCompatible(sum, it->Data, it->Size);
+ }
+
+ Packet.v1.Header.PayloadCRC32 = sum;
+ Packet.v1.Header.DataSize = DataSize;
+ Packet.v1.Header.Sign();
+ }
+ }
+};
diff --git a/library/cpp/actors/interconnect/poller.h b/library/cpp/actors/interconnect/poller.h
new file mode 100644
index 0000000000..ff7979369f
--- /dev/null
+++ b/library/cpp/actors/interconnect/poller.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <functional>
+#include <library/cpp/actors/core/events.h>
+
+namespace NActors {
+ class TSharedDescriptor: public TThrRefBase {
+ public:
+ virtual int GetDescriptor() = 0;
+ };
+
+ using TDelegate = std::function<void()>;
+ using TFDDelegate = std::function<TDelegate(const TIntrusivePtr<TSharedDescriptor>&)>;
+
+ class IPoller: public TThrRefBase {
+ public:
+ virtual ~IPoller() = default;
+
+ virtual void StartRead(const TIntrusivePtr<TSharedDescriptor>& s, TFDDelegate&& operation) = 0;
+ virtual void StartWrite(const TIntrusivePtr<TSharedDescriptor>& s, TFDDelegate&& operation) = 0;
+ };
+
+}
diff --git a/library/cpp/actors/interconnect/poller_actor.cpp b/library/cpp/actors/interconnect/poller_actor.cpp
new file mode 100644
index 0000000000..e75cbcaef4
--- /dev/null
+++ b/library/cpp/actors/interconnect/poller_actor.cpp
@@ -0,0 +1,294 @@
+#include "poller_actor.h"
+#include "interconnect_common.h"
+
+#include <library/cpp/actors/core/actor_bootstrapped.h>
+#include <library/cpp/actors/core/actorsystem.h>
+#include <library/cpp/actors/core/hfunc.h>
+#include <library/cpp/actors/core/log.h>
+#include <library/cpp/actors/core/probes.h>
+#include <library/cpp/actors/protos/services_common.pb.h>
+#include <library/cpp/actors/util/funnel_queue.h>
+
+#include <util/generic/intrlist.h>
+#include <util/system/thread.h>
+#include <util/system/event.h>
+#include <util/system/pipe.h>
+
+#include <variant>
+
+namespace NActors {
+
+ LWTRACE_USING(ACTORLIB_PROVIDER);
+
+ namespace {
+ int LastSocketError() {
+#if defined(_win_)
+ return WSAGetLastError();
+#else
+ return errno;
+#endif
+ }
+ }
+
+ struct TSocketRecord : TThrRefBase {
+ const TIntrusivePtr<TSharedDescriptor> Socket;
+ const TActorId ReadActorId;
+ const TActorId WriteActorId;
+ std::atomic_uint32_t Flags = 0;
+
+ TSocketRecord(TEvPollerRegister& ev)
+ : Socket(std::move(ev.Socket))
+ , ReadActorId(ev.ReadActorId)
+ , WriteActorId(ev.WriteActorId)
+ {}
+ };
+
+ template<typename TDerived>
+ class TPollerThreadBase : public ISimpleThread {
+ protected:
+ struct TPollerExitThread {}; // issued then we need to terminate the poller thread
+
+ struct TPollerWakeup {};
+
+ struct TPollerUnregisterSocket {
+ TIntrusivePtr<TSharedDescriptor> Socket;
+
+ TPollerUnregisterSocket(TIntrusivePtr<TSharedDescriptor> socket)
+ : Socket(std::move(socket))
+ {}
+ };
+
+ using TPollerSyncOperation = std::variant<TPollerExitThread, TPollerWakeup, TPollerUnregisterSocket>;
+
+ struct TPollerSyncOperationWrapper {
+ TPollerSyncOperation Operation;
+ TManualEvent Event;
+
+ TPollerSyncOperationWrapper(TPollerSyncOperation&& operation)
+ : Operation(std::move(operation))
+ {}
+
+ void Wait() {
+ Event.WaitI();
+ }
+
+ void SignalDone() {
+ Event.Signal();
+ }
+ };
+
+ TActorSystem *ActorSystem;
+ TPipeHandle ReadEnd, WriteEnd; // pipe for sync event processor
+ TFunnelQueue<TPollerSyncOperationWrapper*> SyncOperationsQ; // operation queue
+
+ public:
+ TPollerThreadBase(TActorSystem *actorSystem)
+ : ActorSystem(actorSystem)
+ {
+ // create a pipe for notifications
+ try {
+ TPipeHandle::Pipe(ReadEnd, WriteEnd, CloseOnExec);
+ } catch (const TFileError& err) {
+ Y_FAIL("failed to create pipe");
+ }
+
+ // switch the read/write ends to nonblocking mode
+ SetNonBlock(ReadEnd);
+ SetNonBlock(WriteEnd);
+ }
+
+ void UnregisterSocket(const TIntrusivePtr<TSocketRecord>& record) {
+ ExecuteSyncOperation(TPollerUnregisterSocket(record->Socket));
+ }
+
+ protected:
+ void Notify(TSocketRecord *record, bool read, bool write) {
+ auto issue = [&](const TActorId& recipient) {
+ ActorSystem->Send(new IEventHandle(recipient, {}, new TEvPollerReady(record->Socket, read, write)));
+ };
+ if (read && record->ReadActorId) {
+ issue(record->ReadActorId);
+ if (write && record->WriteActorId && record->WriteActorId != record->ReadActorId) {
+ issue(record->WriteActorId);
+ }
+ } else if (write && record->WriteActorId) {
+ issue(record->WriteActorId);
+ }
+ }
+
+ void Stop() {
+ // signal poller thread to stop and wait for the thread
+ ExecuteSyncOperation(TPollerExitThread());
+ ISimpleThread::Join();
+ }
+
+ void ExecuteSyncOperation(TPollerSyncOperation&& op) {
+ TPollerSyncOperationWrapper wrapper(std::move(op));
+ if (SyncOperationsQ.Push(&wrapper)) {
+ // this was the first entry, so we push notification through the pipe
+ for (;;) {
+ char buffer = '\x00';
+ ssize_t nwritten = WriteEnd.Write(&buffer, sizeof(buffer));
+ if (nwritten < 0) {
+ const int err = LastSocketError();
+ if (err == EINTR) {
+ continue;
+ } else {
+ Y_FAIL("WriteEnd.Write() failed with %s", strerror(err));
+ }
+ } else {
+ Y_VERIFY(nwritten);
+ break;
+ }
+ }
+ }
+ // wait for operation to complete
+ wrapper.Wait();
+ }
+
+ bool DrainReadEnd() {
+ size_t totalRead = 0;
+ char buffer[4096];
+ for (;;) {
+ ssize_t n = ReadEnd.Read(buffer, sizeof(buffer));
+ if (n < 0) {
+ const int error = LastSocketError();
+ if (error == EINTR) {
+ continue;
+ } else if (error == EAGAIN || error == EWOULDBLOCK) {
+ break;
+ } else {
+ Y_FAIL("read() failed with %s", strerror(errno));
+ }
+ } else {
+ Y_VERIFY(n);
+ totalRead += n;
+ }
+ }
+ return totalRead;
+ }
+
+ bool ProcessSyncOpQueue() {
+ if (DrainReadEnd()) {
+ Y_VERIFY(!SyncOperationsQ.IsEmpty());
+ do {
+ TPollerSyncOperationWrapper *op = SyncOperationsQ.Top();
+ if (auto *unregister = std::get_if<TPollerUnregisterSocket>(&op->Operation)) {
+ static_cast<TDerived&>(*this).UnregisterSocketInLoop(unregister->Socket);
+ op->SignalDone();
+ } else if (std::get_if<TPollerExitThread>(&op->Operation)) {
+ op->SignalDone();
+ return false; // terminate the thread
+ } else if (std::get_if<TPollerWakeup>(&op->Operation)) {
+ op->SignalDone();
+ } else {
+ Y_FAIL();
+ }
+ } while (SyncOperationsQ.Pop());
+ }
+ return true;
+ }
+
+ void *ThreadProc() override {
+ SetCurrentThreadName("network poller");
+ while (ProcessSyncOpQueue()) {
+ static_cast<TDerived&>(*this).ProcessEventsInLoop();
+ }
+ return nullptr;
+ }
+ };
+
+} // namespace NActors
+
+#if defined(_linux_)
+# include "poller_actor_linux.h"
+#elif defined(_darwin_)
+# include "poller_actor_darwin.h"
+#elif defined(_win_)
+# include "poller_actor_win.h"
+#else
+# error "Unsupported platform"
+#endif
+
+namespace NActors {
+
+ class TPollerToken::TImpl {
+ std::weak_ptr<TPollerThread> Thread;
+ TIntrusivePtr<TSocketRecord> Record; // valid only when Thread is held locked
+
+ public:
+ TImpl(std::shared_ptr<TPollerThread> thread, TIntrusivePtr<TSocketRecord> record)
+ : Thread(thread)
+ , Record(std::move(record))
+ {
+ thread->RegisterSocket(Record);
+ }
+
+ ~TImpl() {
+ if (auto thread = Thread.lock()) {
+ thread->UnregisterSocket(Record);
+ }
+ }
+
+ void Request(bool read, bool write) {
+ if (auto thread = Thread.lock()) {
+ thread->Request(Record, read, write);
+ }
+ }
+
+ const TIntrusivePtr<TSharedDescriptor>& Socket() const {
+ return Record->Socket;
+ }
+ };
+
+ class TPollerActor: public TActorBootstrapped<TPollerActor> {
+ // poller thread
+ std::shared_ptr<TPollerThread> PollerThread;
+
+ public:
+ static constexpr IActor::EActivityType ActorActivityType() {
+ return IActor::INTERCONNECT_POLLER;
+ }
+
+ void Bootstrap() {
+ PollerThread = std::make_shared<TPollerThread>(TlsActivationContext->ExecutorThread.ActorSystem);
+ Become(&TPollerActor::StateFunc);
+ }
+
+ STRICT_STFUNC(StateFunc,
+ hFunc(TEvPollerRegister, Handle);
+ cFunc(TEvents::TSystem::Poison, PassAway);
+ )
+
+ void Handle(TEvPollerRegister::TPtr& ev) {
+ auto *msg = ev->Get();
+ auto impl = std::make_unique<TPollerToken::TImpl>(PollerThread, MakeIntrusive<TSocketRecord>(*msg));
+ auto socket = impl->Socket();
+ TPollerToken::TPtr token(new TPollerToken(std::move(impl)));
+ if (msg->ReadActorId && msg->WriteActorId && msg->WriteActorId != msg->ReadActorId) {
+ Send(msg->ReadActorId, new TEvPollerRegisterResult(socket, token));
+ Send(msg->WriteActorId, new TEvPollerRegisterResult(socket, std::move(token)));
+ } else if (msg->ReadActorId) {
+ Send(msg->ReadActorId, new TEvPollerRegisterResult(socket, std::move(token)));
+ } else if (msg->WriteActorId) {
+ Send(msg->WriteActorId, new TEvPollerRegisterResult(socket, std::move(token)));
+ }
+ }
+ };
+
+ TPollerToken::TPollerToken(std::unique_ptr<TImpl> impl)
+ : Impl(std::move(impl))
+ {}
+
+ TPollerToken::~TPollerToken()
+ {}
+
+ void TPollerToken::Request(bool read, bool write) {
+ Impl->Request(read, write);
+ }
+
+ IActor* CreatePollerActor() {
+ return new TPollerActor;
+ }
+
+}
diff --git a/library/cpp/actors/interconnect/poller_actor.h b/library/cpp/actors/interconnect/poller_actor.h
new file mode 100644
index 0000000000..f927b82089
--- /dev/null
+++ b/library/cpp/actors/interconnect/poller_actor.h
@@ -0,0 +1,63 @@
+#pragma once
+
+#include "events_local.h"
+#include "poller.h"
+#include <library/cpp/actors/core/actor.h>
+
+namespace NActors {
+ struct TEvPollerRegister : TEventLocal<TEvPollerRegister, ui32(ENetwork::EvPollerRegister)> {
+ const TIntrusivePtr<TSharedDescriptor> Socket; // socket to watch for
+ const TActorId ReadActorId; // actor id to notify about read availability
+ const TActorId WriteActorId; // actor id to notify about write availability; may be the same as the ReadActorId
+
+ TEvPollerRegister(TIntrusivePtr<TSharedDescriptor> socket, const TActorId& readActorId, const TActorId& writeActorId)
+ : Socket(std::move(socket))
+ , ReadActorId(readActorId)
+ , WriteActorId(writeActorId)
+ {}
+ };
+
+ // poller token is sent in response to TEvPollerRegister; it allows requesting poll when read/write returns EAGAIN
+ class TPollerToken : public TThrRefBase {
+ class TImpl;
+ std::unique_ptr<TImpl> Impl;
+
+ friend class TPollerActor;
+ TPollerToken(std::unique_ptr<TImpl> impl);
+
+ public:
+ ~TPollerToken();
+ void Request(bool read, bool write);
+
+ using TPtr = TIntrusivePtr<TPollerToken>;
+ };
+
+ struct TEvPollerRegisterResult : TEventLocal<TEvPollerRegisterResult, ui32(ENetwork::EvPollerRegisterResult)> {
+ TIntrusivePtr<TSharedDescriptor> Socket;
+ TPollerToken::TPtr PollerToken;
+
+ TEvPollerRegisterResult(TIntrusivePtr<TSharedDescriptor> socket, TPollerToken::TPtr pollerToken)
+ : Socket(std::move(socket))
+ , PollerToken(std::move(pollerToken))
+ {}
+ };
+
+ struct TEvPollerReady : TEventLocal<TEvPollerReady, ui32(ENetwork::EvPollerReady)> {
+ TIntrusivePtr<TSharedDescriptor> Socket;
+ const bool Read, Write;
+
+ TEvPollerReady(TIntrusivePtr<TSharedDescriptor> socket, bool read, bool write)
+ : Socket(std::move(socket))
+ , Read(read)
+ , Write(write)
+ {}
+ };
+
+ IActor* CreatePollerActor();
+
+ inline TActorId MakePollerActorId() {
+ char x[12] = {'I', 'C', 'P', 'o', 'l', 'l', 'e', 'r', '\xDE', '\xAD', '\xBE', '\xEF'};
+ return TActorId(0, TStringBuf(std::begin(x), std::end(x)));
+ }
+
+}
diff --git a/library/cpp/actors/interconnect/poller_actor_darwin.h b/library/cpp/actors/interconnect/poller_actor_darwin.h
new file mode 100644
index 0000000000..4cb0a58f8d
--- /dev/null
+++ b/library/cpp/actors/interconnect/poller_actor_darwin.h
@@ -0,0 +1,95 @@
+#pragma once
+
+#include <sys/event.h>
+
+namespace NActors {
+
+ class TKqueueThread : public TPollerThreadBase<TKqueueThread> {
+ // KQueue file descriptor
+ int KqDescriptor;
+
+ void SafeKevent(const struct kevent* ev, int size) {
+ int rc;
+ do {
+ rc = kevent(KqDescriptor, ev, size, nullptr, 0, nullptr);
+ } while (rc == -1 && errno == EINTR);
+ Y_VERIFY(rc != -1, "kevent() failed with %s", strerror(errno));
+ }
+
+ public:
+ TKqueueThread(TActorSystem *actorSystem)
+ : TPollerThreadBase(actorSystem)
+ {
+ // create kqueue
+ KqDescriptor = kqueue();
+ Y_VERIFY(KqDescriptor != -1, "kqueue() failed with %s", strerror(errno));
+
+ // set close-on-exit flag
+ {
+ int flags = fcntl(KqDescriptor, F_GETFD);
+ Y_VERIFY(flags >= 0, "fcntl(F_GETFD) failed with %s", strerror(errno));
+ int rc = fcntl(KqDescriptor, F_SETFD, flags | FD_CLOEXEC);
+ Y_VERIFY(rc != -1, "fcntl(F_SETFD, +FD_CLOEXEC) failed with %s", strerror(errno));
+ }
+
+ // register pipe's read end in poller
+ struct kevent ev;
+ EV_SET(&ev, (int)ReadEnd, EVFILT_READ, EV_ADD | EV_ENABLE, 0, 0, nullptr);
+ SafeKevent(&ev, 1);
+
+ ISimpleThread::Start(); // start poller thread
+ }
+
+ ~TKqueueThread() {
+ Stop();
+ close(KqDescriptor);
+ }
+
+ void ProcessEventsInLoop() {
+ std::array<struct kevent, 256> events;
+
+ int numReady = kevent(KqDescriptor, nullptr, 0, events.data(), events.size(), nullptr);
+ if (numReady == -1) {
+ if (errno == EINTR) {
+ return;
+ } else {
+ Y_FAIL("kevent() failed with %s", strerror(errno));
+ }
+ }
+
+ for (int i = 0; i < numReady; ++i) {
+ const struct kevent& ev = events[i];
+ if (ev.udata) {
+ TSocketRecord *it = static_cast<TSocketRecord*>(ev.udata);
+ const bool error = ev.flags & (EV_EOF | EV_ERROR);
+ const bool read = error || ev.filter == EVFILT_READ;
+ const bool write = error || ev.filter == EVFILT_WRITE;
+ Notify(it, read, write);
+ }
+ }
+ }
+
+ void UnregisterSocketInLoop(const TIntrusivePtr<TSharedDescriptor>& socket) {
+ struct kevent ev[2];
+ const int fd = socket->GetDescriptor();
+ EV_SET(&ev[0], fd, EVFILT_READ, EV_DELETE, 0, 0, nullptr);
+ EV_SET(&ev[1], fd, EVFILT_WRITE, EV_DELETE, 0, 0, nullptr);
+ SafeKevent(ev, 2);
+ }
+
+ void RegisterSocket(const TIntrusivePtr<TSocketRecord>& record) {
+ int flags = EV_ADD | EV_CLEAR | EV_ENABLE;
+ struct kevent ev[2];
+ const int fd = record->Socket->GetDescriptor();
+ EV_SET(&ev[0], fd, EVFILT_READ, flags, 0, 0, record.Get());
+ EV_SET(&ev[1], fd, EVFILT_WRITE, flags, 0, 0, record.Get());
+ SafeKevent(ev, 2);
+ }
+
+ void Request(const TIntrusivePtr<TSocketRecord>& /*socket*/, bool /*read*/, bool /*write*/)
+ {} // no special processing here as we use kqueue in edge-triggered mode
+ };
+
+ using TPollerThread = TKqueueThread;
+
+}
diff --git a/library/cpp/actors/interconnect/poller_actor_linux.h b/library/cpp/actors/interconnect/poller_actor_linux.h
new file mode 100644
index 0000000000..dd4f7c0124
--- /dev/null
+++ b/library/cpp/actors/interconnect/poller_actor_linux.h
@@ -0,0 +1,114 @@
+#pragma once
+
+#include <sys/epoll.h>
+
+namespace NActors {
+
+ class TEpollThread : public TPollerThreadBase<TEpollThread> {
+ // epoll file descriptor
+ int EpollDescriptor;
+
+ public:
+ TEpollThread(TActorSystem *actorSystem)
+ : TPollerThreadBase(actorSystem)
+ {
+ EpollDescriptor = epoll_create1(EPOLL_CLOEXEC);
+ Y_VERIFY(EpollDescriptor != -1, "epoll_create1() failed with %s", strerror(errno));
+
+ epoll_event event;
+ event.data.ptr = nullptr;
+ event.events = EPOLLIN;
+ if (epoll_ctl(EpollDescriptor, EPOLL_CTL_ADD, ReadEnd, &event) == -1) {
+ Y_FAIL("epoll_ctl(EPOLL_CTL_ADD) failed with %s", strerror(errno));
+ }
+
+ ISimpleThread::Start(); // start poller thread
+ }
+
+ ~TEpollThread() {
+ Stop();
+ close(EpollDescriptor);
+ }
+
+ void ProcessEventsInLoop() {
+ // preallocated array for events
+ std::array<epoll_event, 256> events;
+
+ // wait indefinitely for event to arrive
+ LWPROBE(EpollStartWaitIn);
+ int numReady = epoll_wait(EpollDescriptor, events.data(), events.size(), -1);
+ LWPROBE(EpollFinishWaitIn, numReady);
+
+ // check return status for any errors
+ if (numReady == -1) {
+ if (errno == EINTR) {
+ return; // restart the call a bit later
+ } else {
+ Y_FAIL("epoll_wait() failed with %s", strerror(errno));
+ }
+ }
+
+ for (int i = 0; i < numReady; ++i) {
+ const epoll_event& ev = events[i];
+ if (auto *record = static_cast<TSocketRecord*>(ev.data.ptr)) {
+ const bool read = ev.events & (EPOLLIN | EPOLLHUP | EPOLLRDHUP | EPOLLERR);
+ const bool write = ev.events & (EPOLLOUT | EPOLLERR);
+
+ // remove hit flags from the bit set
+ ui32 flags = record->Flags;
+ const ui32 remove = (read ? EPOLLIN : 0) | (write ? EPOLLOUT : 0);
+ while (!record->Flags.compare_exchange_weak(flags, flags & ~remove))
+ {}
+ flags &= ~remove;
+
+ // rearm poller if some flags remain
+ if (flags) {
+ epoll_event event;
+ event.events = EPOLLONESHOT | EPOLLRDHUP | flags;
+ event.data.ptr = record;
+ if (epoll_ctl(EpollDescriptor, EPOLL_CTL_MOD, record->Socket->GetDescriptor(), &event) == -1) {
+ Y_FAIL("epoll_ctl(EPOLL_CTL_MOD) failed with %s", strerror(errno));
+ }
+ }
+
+ // issue notifications
+ Notify(record, read, write);
+ }
+ }
+ }
+
+ void UnregisterSocketInLoop(const TIntrusivePtr<TSharedDescriptor>& socket) {
+ if (epoll_ctl(EpollDescriptor, EPOLL_CTL_DEL, socket->GetDescriptor(), nullptr) == -1) {
+ Y_FAIL("epoll_ctl(EPOLL_CTL_DEL) failed with %s", strerror(errno));
+ }
+ }
+
+ void RegisterSocket(const TIntrusivePtr<TSocketRecord>& record) {
+ epoll_event event;
+ event.events = EPOLLONESHOT | EPOLLRDHUP;
+ event.data.ptr = record.Get();
+ if (epoll_ctl(EpollDescriptor, EPOLL_CTL_ADD, record->Socket->GetDescriptor(), &event) == -1) {
+ Y_FAIL("epoll_ctl(EPOLL_CTL_ADD) failed with %s", strerror(errno));
+ }
+ }
+
+ void Request(const TIntrusivePtr<TSocketRecord>& record, bool read, bool write) {
+ const ui32 add = (read ? EPOLLIN : 0) | (write ? EPOLLOUT : 0);
+ ui32 flags = record->Flags;
+ while (!record->Flags.compare_exchange_weak(flags, flags | add))
+ {}
+ flags |= add;
+ if (flags) {
+ epoll_event event;
+ event.events = EPOLLONESHOT | EPOLLRDHUP | flags;
+ event.data.ptr = record.Get();
+ if (epoll_ctl(EpollDescriptor, EPOLL_CTL_MOD, record->Socket->GetDescriptor(), &event) == -1) {
+ Y_FAIL("epoll_ctl(EPOLL_CTL_MOD) failed with %s", strerror(errno));
+ }
+ }
+ }
+ };
+
+ using TPollerThread = TEpollThread;
+
+} // namespace NActors
diff --git a/library/cpp/actors/interconnect/poller_actor_win.h b/library/cpp/actors/interconnect/poller_actor_win.h
new file mode 100644
index 0000000000..4b4caa0ebd
--- /dev/null
+++ b/library/cpp/actors/interconnect/poller_actor_win.h
@@ -0,0 +1,103 @@
+#pragma once
+
+namespace NActors {
+
+ class TSelectThread : public TPollerThreadBase<TSelectThread> {
+ TMutex Mutex;
+ std::unordered_map<SOCKET, TIntrusivePtr<TSocketRecord>> Descriptors;
+
+ enum {
+ READ = 1,
+ WRITE = 2,
+ };
+
+ public:
+ TSelectThread(TActorSystem *actorSystem)
+ : TPollerThreadBase(actorSystem)
+ {
+ Descriptors.emplace(ReadEnd, nullptr);
+ ISimpleThread::Start();
+ }
+
+ ~TSelectThread() {
+ Stop();
+ }
+
+ void ProcessEventsInLoop() {
+ fd_set readfds, writefds, exceptfds;
+
+ FD_ZERO(&readfds);
+ FD_ZERO(&writefds);
+ FD_ZERO(&exceptfds);
+ int nfds = 0;
+ with_lock (Mutex) {
+ for (const auto& [key, record] : Descriptors) {
+ const int fd = key;
+ auto add = [&](auto& set) {
+ FD_SET(fd, &set);
+ nfds = Max<int>(nfds, fd + 1);
+ };
+ if (!record || (record->Flags & READ)) {
+ add(readfds);
+ }
+ if (!record || (record->Flags & WRITE)) {
+ add(writefds);
+ }
+ add(exceptfds);
+ }
+ }
+
+ int res = select(nfds, &readfds, &writefds, &exceptfds, nullptr);
+ if (res == -1) {
+ const int err = LastSocketError();
+ if (err == EINTR) {
+ return; // try a bit later
+ } else {
+ Y_FAIL("select() failed with %s", strerror(err));
+ }
+ }
+
+ with_lock (Mutex) {
+ for (const auto& [fd, record] : Descriptors) {
+ if (record) {
+ const bool error = FD_ISSET(fd, &exceptfds);
+ const bool read = error || FD_ISSET(fd, &readfds);
+ const bool write = error || FD_ISSET(fd, &writefds);
+ if (read) {
+ record->Flags &= ~READ;
+ }
+ if (write) {
+ record->Flags &= ~WRITE;
+ }
+ Notify(record.Get(), read, write);
+ }
+ }
+ }
+ }
+
+ void UnregisterSocketInLoop(const TIntrusivePtr<TSharedDescriptor>& socket) {
+ with_lock (Mutex) {
+ Descriptors.erase(socket->GetDescriptor());
+ }
+ }
+
+ void RegisterSocket(const TIntrusivePtr<TSocketRecord>& record) {
+ with_lock (Mutex) {
+ Descriptors.emplace(record->Socket->GetDescriptor(), record);
+ }
+ ExecuteSyncOperation(TPollerWakeup());
+ }
+
+ void Request(const TIntrusivePtr<TSocketRecord>& record, bool read, bool write) {
+ with_lock (Mutex) {
+ const auto it = Descriptors.find(record->Socket->GetDescriptor());
+ Y_VERIFY(it != Descriptors.end());
+ it->second->Flags |= (read ? READ : 0) | (write ? WRITE : 0);
+ }
+ ExecuteSyncOperation(TPollerWakeup());
+ }
+ };
+
+ using TPollerThread = TSelectThread;
+
+} // NActors
diff --git a/library/cpp/actors/interconnect/poller_tcp.cpp b/library/cpp/actors/interconnect/poller_tcp.cpp
new file mode 100644
index 0000000000..8267df31ea
--- /dev/null
+++ b/library/cpp/actors/interconnect/poller_tcp.cpp
@@ -0,0 +1,35 @@
+#include "poller_tcp.h"
+
+namespace NInterconnect {
+ TPollerThreads::TPollerThreads(size_t units, bool useSelect)
+ : Units(units)
+ {
+ Y_VERIFY_DEBUG(!Units.empty());
+ for (auto& unit : Units)
+ unit = TPollerUnit::Make(useSelect);
+ }
+
+ TPollerThreads::~TPollerThreads() {
+ }
+
+ void TPollerThreads::Start() {
+ for (const auto& unit : Units)
+ unit->Start();
+ }
+
+ void TPollerThreads::Stop() {
+ for (const auto& unit : Units)
+ unit->Stop();
+ }
+
+ void TPollerThreads::StartRead(const TIntrusivePtr<TSharedDescriptor>& s, TFDDelegate&& operation) {
+ auto& unit = Units[THash<SOCKET>()(s->GetDescriptor()) % Units.size()];
+ unit->StartReadOperation(s, std::move(operation));
+ }
+
+ void TPollerThreads::StartWrite(const TIntrusivePtr<TSharedDescriptor>& s, TFDDelegate&& operation) {
+ auto& unit = Units[THash<SOCKET>()(s->GetDescriptor()) % Units.size()];
+ unit->StartWriteOperation(s, std::move(operation));
+ }
+
+}
diff --git a/library/cpp/actors/interconnect/poller_tcp.h b/library/cpp/actors/interconnect/poller_tcp.h
new file mode 100644
index 0000000000..310265eccd
--- /dev/null
+++ b/library/cpp/actors/interconnect/poller_tcp.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "poller_tcp_unit.h"
+#include "poller.h"
+
+#include <util/generic/vector.h>
+#include <util/generic/hash.h>
+
+namespace NInterconnect {
+ class TPollerThreads: public NActors::IPoller {
+ public:
+ TPollerThreads(size_t units = 1U, bool useSelect = false);
+ ~TPollerThreads();
+
+ void Start();
+ void Stop();
+
+ void StartRead(const TIntrusivePtr<TSharedDescriptor>& s, TFDDelegate&& operation) override;
+ void StartWrite(const TIntrusivePtr<TSharedDescriptor>& s, TFDDelegate&& operation) override;
+
+ private:
+ TVector<TPollerUnit::TPtr> Units;
+ };
+
+}
diff --git a/library/cpp/actors/interconnect/poller_tcp_unit.cpp b/library/cpp/actors/interconnect/poller_tcp_unit.cpp
new file mode 100644
index 0000000000..59e7dda810
--- /dev/null
+++ b/library/cpp/actors/interconnect/poller_tcp_unit.cpp
@@ -0,0 +1,126 @@
+#include "poller_tcp_unit.h"
+
+#if !defined(_win_) && !defined(_darwin_)
+#include "poller_tcp_unit_epoll.h"
+#endif
+
+#include "poller_tcp_unit_select.h"
+#include "poller.h"
+
+#include <library/cpp/actors/prof/tag.h>
+#include <library/cpp/actors/util/intrinsics.h>
+
+#if defined _linux_
+#include <pthread.h>
+#endif
+
+namespace NInterconnect {
+ TPollerUnit::TPtr
+ TPollerUnit::Make(bool useSelect) {
+#if defined(_win_) || defined(_darwin_)
+ Y_UNUSED(useSelect);
+ return TPtr(new TPollerUnitSelect);
+#else
+ return useSelect ? TPtr(new TPollerUnitSelect) : TPtr(new TPollerUnitEpoll);
+#endif
+ }
+
+ TPollerUnit::TPollerUnit()
+ : StopFlag(true)
+ , ReadLoop(TThread::TParams(IdleThread<false>, this).SetName("network read"))
+ , WriteLoop(TThread::TParams(IdleThread<true>, this).SetName("network write"))
+ {
+ }
+
+ TPollerUnit::~TPollerUnit() {
+ if (!AtomicLoad(&StopFlag))
+ Stop();
+ }
+
+ void
+ TPollerUnit::Start() {
+ AtomicStore(&StopFlag, false);
+ ReadLoop.Start();
+ WriteLoop.Start();
+ }
+
+ void
+ TPollerUnit::Stop() {
+ AtomicStore(&StopFlag, true);
+ ReadLoop.Join();
+ WriteLoop.Join();
+ }
+
+ template <>
+ TPollerUnit::TSide&
+ TPollerUnit::GetSide<false>() {
+ return Read;
+ }
+
+ template <>
+ TPollerUnit::TSide&
+ TPollerUnit::GetSide<true>() {
+ return Write;
+ }
+
+ void
+ TPollerUnit::StartReadOperation(
+ const TIntrusivePtr<TSharedDescriptor>& stream,
+ TFDDelegate&& operation) {
+ Y_VERIFY_DEBUG(stream);
+ if (AtomicLoad(&StopFlag))
+ return;
+ GetSide<false>().InputQueue.Push(TSide::TItem(stream, std::move(operation)));
+ }
+
+ void
+ TPollerUnit::StartWriteOperation(
+ const TIntrusivePtr<TSharedDescriptor>& stream,
+ TFDDelegate&& operation) {
+ Y_VERIFY_DEBUG(stream);
+ if (AtomicLoad(&StopFlag))
+ return;
+ GetSide<true>().InputQueue.Push(TSide::TItem(stream, std::move(operation)));
+ }
+
+ template <bool IsWrite>
+ void*
+ TPollerUnit::IdleThread(void* param) {
+ // TODO: musl-libc version of `sched_param` struct is for some reason different from pthread
+ // version in Ubuntu 12.04
+#if defined(_linux_) && !defined(_musl_)
+ pthread_t threadSelf = pthread_self();
+ sched_param sparam = {20};
+ pthread_setschedparam(threadSelf, SCHED_FIFO, &sparam);
+#endif
+
+ static_cast<TPollerUnit*>(param)->RunLoop<IsWrite>();
+ return nullptr;
+ }
+
+ template <>
+ void
+ TPollerUnit::RunLoop<false>() {
+ NProfiling::TMemoryTagScope tag("INTERCONNECT_RECEIVED_DATA");
+ while (!AtomicLoad(&StopFlag))
+ ProcessRead();
+ }
+
+ template <>
+ void
+ TPollerUnit::RunLoop<true>() {
+ NProfiling::TMemoryTagScope tag("INTERCONNECT_SEND_DATA");
+ while (!AtomicLoad(&StopFlag))
+ ProcessWrite();
+ }
+
+ void
+ TPollerUnit::TSide::ProcessInput() {
+ if (!InputQueue.IsEmpty())
+ do {
+ auto sock = InputQueue.Top().first->GetDescriptor();
+ if (!Operations.emplace(sock, std::move(InputQueue.Top())).second)
+ Y_FAIL("Descriptor is already in pooler.");
+ } while (InputQueue.Pop());
+ }
+}
diff --git a/library/cpp/actors/interconnect/poller_tcp_unit.h b/library/cpp/actors/interconnect/poller_tcp_unit.h
new file mode 100644
index 0000000000..692168b968
--- /dev/null
+++ b/library/cpp/actors/interconnect/poller_tcp_unit.h
@@ -0,0 +1,67 @@
+#pragma once
+
+#include <util/system/thread.h>
+#include <library/cpp/actors/util/funnel_queue.h>
+
+#include "interconnect_stream.h"
+
+#include <memory>
+#include <functional>
+#include <unordered_map>
+
+namespace NInterconnect {
+ using NActors::TFDDelegate;
+ using NActors::TSharedDescriptor;
+
+ class TPollerUnit {
+ public:
+ typedef std::unique_ptr<TPollerUnit> TPtr;
+
+ static TPtr Make(bool useSelect);
+
+ void Start();
+ void Stop();
+
+ virtual void StartReadOperation(
+ const TIntrusivePtr<TSharedDescriptor>& stream,
+ TFDDelegate&& operation);
+
+ virtual void StartWriteOperation(
+ const TIntrusivePtr<TSharedDescriptor>& stream,
+ TFDDelegate&& operation);
+
+ virtual ~TPollerUnit();
+
+ private:
+ virtual void ProcessRead() = 0;
+ virtual void ProcessWrite() = 0;
+
+ template <bool IsWrite>
+ static void* IdleThread(void* param);
+
+ template <bool IsWrite>
+ void RunLoop();
+
+ volatile bool StopFlag;
+ TThread ReadLoop, WriteLoop;
+
+ protected:
+ TPollerUnit();
+
+ struct TSide {
+ using TOperations =
+ std::unordered_map<SOCKET,
+ std::pair<TIntrusivePtr<TSharedDescriptor>, TFDDelegate>>;
+
+ TOperations Operations;
+ using TItem = TOperations::mapped_type;
+ TFunnelQueue<TItem> InputQueue;
+
+ void ProcessInput();
+ } Read, Write;
+
+ template <bool IsWrite>
+ TSide& GetSide();
+ };
+
+}
diff --git a/library/cpp/actors/interconnect/poller_tcp_unit_epoll.cpp b/library/cpp/actors/interconnect/poller_tcp_unit_epoll.cpp
new file mode 100644
index 0000000000..c78538b95b
--- /dev/null
+++ b/library/cpp/actors/interconnect/poller_tcp_unit_epoll.cpp
@@ -0,0 +1,125 @@
+#include "poller_tcp_unit_epoll.h"
+#if !defined(_win_) && !defined(_darwin_)
+#include <unistd.h>
+#include <sys/epoll.h>
+
+#include <csignal>
+#include <cerrno>
+#include <cstring>
+
+namespace NInterconnect {
+ namespace {
+ void
+ DeleteEpoll(int epoll, SOCKET stream) {
+ ::epoll_event event = {0, {.fd = stream}};
+ if (::epoll_ctl(epoll, EPOLL_CTL_DEL, stream, &event)) {
+ Cerr << "epoll_ctl errno: " << errno << Endl;
+ Y_FAIL("epoll delete error!");
+ }
+ }
+
+ template <ui32 Events>
+ void
+ AddEpoll(int epoll, SOCKET stream) {
+ ::epoll_event event = {.events = Events};
+ event.data.fd = stream;
+ if (::epoll_ctl(epoll, EPOLL_CTL_ADD, stream, &event)) {
+ Cerr << "epoll_ctl errno: " << errno << Endl;
+ Y_FAIL("epoll add error!");
+ }
+ }
+
+ int
+ Initialize() {
+ const auto epoll = ::epoll_create(10000);
+ Y_VERIFY_DEBUG(epoll > 0);
+ return epoll;
+ }
+
+ }
+
+ TPollerUnitEpoll::TPollerUnitEpoll()
+ : ReadDescriptor(Initialize())
+ , WriteDescriptor(Initialize())
+ {
+ // Block on the epoll descriptor.
+ ::sigemptyset(&sigmask);
+ ::sigaddset(&sigmask, SIGPIPE);
+ ::sigaddset(&sigmask, SIGTERM);
+ }
+
+ TPollerUnitEpoll::~TPollerUnitEpoll() {
+ ::close(ReadDescriptor);
+ ::close(WriteDescriptor);
+ }
+
+ template <>
+ int TPollerUnitEpoll::GetDescriptor<false>() const {
+ return ReadDescriptor;
+ }
+
+ template <>
+ int TPollerUnitEpoll::GetDescriptor<true>() const {
+ return WriteDescriptor;
+ }
+
+ void
+ TPollerUnitEpoll::StartReadOperation(
+ const TIntrusivePtr<TSharedDescriptor>& s,
+ TFDDelegate&& operation) {
+ TPollerUnit::StartReadOperation(s, std::move(operation));
+ AddEpoll<EPOLLRDHUP | EPOLLIN>(ReadDescriptor, s->GetDescriptor());
+ }
+
+ void
+ TPollerUnitEpoll::StartWriteOperation(
+ const TIntrusivePtr<TSharedDescriptor>& s,
+ TFDDelegate&& operation) {
+ TPollerUnit::StartWriteOperation(s, std::move(operation));
+ AddEpoll<EPOLLRDHUP | EPOLLOUT>(WriteDescriptor, s->GetDescriptor());
+ }
+
+ constexpr int EVENTS_BUF_SIZE = 128;
+
+ template <bool WriteOp>
+ void
+ TPollerUnitEpoll::Process() {
+ ::epoll_event events[EVENTS_BUF_SIZE];
+
+ const int epoll = GetDescriptor<WriteOp>();
+
+ /* Timeout just to check StopFlag sometimes */
+ const int result =
+ ::epoll_pwait(epoll, events, EVENTS_BUF_SIZE, 200, &sigmask);
+
+ if (result == -1 && errno != EINTR)
+ Y_FAIL("epoll wait error!");
+
+ auto& side = GetSide<WriteOp>();
+ side.ProcessInput();
+
+ for (int i = 0; i < result; ++i) {
+ const auto it = side.Operations.find(events[i].data.fd);
+ if (side.Operations.end() == it)
+ continue;
+ if (const auto& finalizer = it->second.second(it->second.first)) {
+ DeleteEpoll(epoll, it->first);
+ side.Operations.erase(it);
+ finalizer();
+ }
+ }
+ }
+
+ void
+ TPollerUnitEpoll::ProcessRead() {
+ Process<false>();
+ }
+
+ void
+ TPollerUnitEpoll::ProcessWrite() {
+ Process<true>();
+ }
+
+}
+
+#endif
diff --git a/library/cpp/actors/interconnect/poller_tcp_unit_epoll.h b/library/cpp/actors/interconnect/poller_tcp_unit_epoll.h
new file mode 100644
index 0000000000..ff7893eba2
--- /dev/null
+++ b/library/cpp/actors/interconnect/poller_tcp_unit_epoll.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include "poller_tcp_unit.h"
+
+namespace NInterconnect {
+ class TPollerUnitEpoll: public TPollerUnit {
+ public:
+ TPollerUnitEpoll();
+ virtual ~TPollerUnitEpoll();
+
+ private:
+ virtual void StartReadOperation(
+ const TIntrusivePtr<TSharedDescriptor>& s,
+ TFDDelegate&& operation) override;
+
+ virtual void StartWriteOperation(
+ const TIntrusivePtr<TSharedDescriptor>& s,
+ TFDDelegate&& operation) override;
+
+ virtual void ProcessRead() override;
+ virtual void ProcessWrite() override;
+
+ template <bool Write>
+ void Process();
+
+ template <bool Write>
+ int GetDescriptor() const;
+
+ const int ReadDescriptor, WriteDescriptor;
+ ::sigset_t sigmask;
+ };
+
+}
diff --git a/library/cpp/actors/interconnect/poller_tcp_unit_select.cpp b/library/cpp/actors/interconnect/poller_tcp_unit_select.cpp
new file mode 100644
index 0000000000..ae7aaad566
--- /dev/null
+++ b/library/cpp/actors/interconnect/poller_tcp_unit_select.cpp
@@ -0,0 +1,86 @@
+#include "poller_tcp_unit_select.h"
+
+#include <csignal>
+
+#if defined(_win_)
+#include <winsock2.h>
+#define SOCKET_ERROR_SOURCE ::WSAGetLastError()
+#elif defined(_darwin_)
+#include <cerrno>
+#define SOCKET_ERROR_SOURCE errno
+typedef timeval TIMEVAL;
+#else
+#include <cerrno>
+#define SOCKET_ERROR_SOURCE errno
+#endif
+
+namespace NInterconnect {
+ TPollerUnitSelect::TPollerUnitSelect() {
+ }
+
+ TPollerUnitSelect::~TPollerUnitSelect() {
+ }
+
+ template <bool IsWrite>
+ void
+ TPollerUnitSelect::Process() {
+ auto& side = GetSide<IsWrite>();
+ side.ProcessInput();
+
+ enum : size_t { R,
+ W,
+ E };
+ static const auto O = IsWrite ? W : R;
+
+ ::fd_set sets[3];
+
+ FD_ZERO(&sets[R]);
+ FD_ZERO(&sets[W]);
+ FD_ZERO(&sets[E]);
+
+ for (const auto& operation : side.Operations) {
+ FD_SET(operation.first, &sets[O]);
+ FD_SET(operation.first, &sets[E]);
+ }
+
+#if defined(_win_)
+ ::TIMEVAL timeout = {0L, 99991L};
+ const auto numberEvents = !side.Operations.empty() ? ::select(FD_SETSIZE, &sets[R], &sets[W], &sets[E], &timeout)
+ : (::Sleep(100), 0);
+#elif defined(_darwin_)
+ ::TIMEVAL timeout = {0L, 99991L};
+ const auto numberEvents = ::select(FD_SETSIZE, &sets[R], &sets[W], &sets[E], &timeout);
+#else
+ ::sigset_t sigmask;
+ ::sigemptyset(&sigmask);
+ ::sigaddset(&sigmask, SIGPIPE);
+ ::sigaddset(&sigmask, SIGTERM);
+
+ struct ::timespec timeout = {0L, 99999989L};
+ const auto numberEvents = ::pselect(FD_SETSIZE, &sets[R], &sets[W], &sets[E], &timeout, &sigmask);
+#endif
+
+ Y_VERIFY_DEBUG(numberEvents >= 0);
+
+ for (auto it = side.Operations.cbegin(); side.Operations.cend() != it;) {
+ if (FD_ISSET(it->first, &sets[O]) || FD_ISSET(it->first, &sets[E]))
+ if (const auto& finalizer = it->second.second(it->second.first)) {
+ side.Operations.erase(it++);
+ finalizer();
+ continue;
+ }
+ ++it;
+ }
+ }
+
+ void
+ TPollerUnitSelect::ProcessRead() {
+ Process<false>();
+ }
+
+ void
+ TPollerUnitSelect::ProcessWrite() {
+ Process<true>();
+ }
+
+}
diff --git a/library/cpp/actors/interconnect/poller_tcp_unit_select.h b/library/cpp/actors/interconnect/poller_tcp_unit_select.h
new file mode 100644
index 0000000000..0c15217796
--- /dev/null
+++ b/library/cpp/actors/interconnect/poller_tcp_unit_select.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "poller_tcp_unit.h"
+
+namespace NInterconnect {
+ class TPollerUnitSelect: public TPollerUnit {
+ public:
+ TPollerUnitSelect();
+ virtual ~TPollerUnitSelect();
+
+ private:
+ virtual void ProcessRead() override;
+ virtual void ProcessWrite() override;
+
+ template <bool IsWrite>
+ void Process();
+ };
+
+}
diff --git a/library/cpp/actors/interconnect/profiler.h b/library/cpp/actors/interconnect/profiler.h
new file mode 100644
index 0000000000..77a59e3179
--- /dev/null
+++ b/library/cpp/actors/interconnect/profiler.h
@@ -0,0 +1,142 @@
+#pragma once
+
+#include <library/cpp/actors/util/datetime.h>
+
+namespace NActors {
+
+ class TProfiled {
+ enum class EType : ui32 {
+ ENTRY,
+ EXIT,
+ };
+
+ struct TItem {
+ EType Type; // entry kind
+ int Line;
+ const char *Marker; // name of the profiled function/part
+ ui64 Timestamp; // cycles
+ };
+
+ bool Enable = false;
+ mutable TDeque<TItem> Items;
+
+ friend class TFunction;
+
+ public:
+ class TFunction {
+ const TProfiled& Profiled;
+
+ public:
+ TFunction(const TProfiled& profiled, const char *name, int line)
+ : Profiled(profiled)
+ {
+ Log(EType::ENTRY, name, line);
+ }
+
+ ~TFunction() {
+ Log(EType::EXIT, nullptr, 0);
+ }
+
+ private:
+ void Log(EType type, const char *marker, int line) {
+ if (Profiled.Enable) {
+ Profiled.Items.push_back(TItem{
+ type,
+ line,
+ marker,
+ GetCycleCountFast()
+ });
+ }
+ }
+ };
+
+ public:
+ void Start() {
+ Enable = true;
+ }
+
+ void Finish() {
+ Items.clear();
+ Enable = false;
+ }
+
+ TDuration Duration() const {
+ return CyclesToDuration(Items ? Items.back().Timestamp - Items.front().Timestamp : 0);
+ }
+
+ TString Format() const {
+ TDeque<TItem>::iterator it = Items.begin();
+ TString res = FormatLevel(it);
+ Y_VERIFY(it == Items.end());
+ return res;
+ }
+
+ private:
+ TString FormatLevel(TDeque<TItem>::iterator& it) const {
+ struct TRecord {
+ TString Marker;
+ ui64 Duration;
+ TString Interior;
+
+ bool operator <(const TRecord& other) const {
+ return Duration < other.Duration;
+ }
+ };
+ TVector<TRecord> records;
+
+ while (it != Items.end() && it->Type != EType::EXIT) {
+ Y_VERIFY(it->Type == EType::ENTRY);
+ const TString marker = Sprintf("%s:%d", it->Marker, it->Line);
+ const ui64 begin = it->Timestamp;
+ ++it;
+ const TString interior = FormatLevel(it);
+ Y_VERIFY(it != Items.end());
+ Y_VERIFY(it->Type == EType::EXIT);
+ const ui64 end = it->Timestamp;
+ records.push_back(TRecord{marker, end - begin, interior});
+ ++it;
+ }
+
+ TStringStream s;
+ const ui64 cyclesPerMs = GetCyclesPerMillisecond();
+
+ if (records.size() <= 10) {
+ bool first = true;
+ for (const TRecord& record : records) {
+ if (first) {
+ first = false;
+ } else {
+ s << " ";
+ }
+ s << record.Marker << "(" << (record.Duration * 1000000 / cyclesPerMs) << "ns)";
+ if (record.Interior) {
+ s << " {" << record.Interior << "}";
+ }
+ }
+ } else {
+ TMap<TString, TVector<TRecord>> m;
+ for (TRecord& r : records) {
+ const TString key = r.Marker;
+ m[key].push_back(std::move(r));
+ }
+
+ s << "unordered ";
+ for (auto& [key, value] : m) {
+ auto i = std::max_element(value.begin(), value.end());
+ ui64 sum = 0;
+ for (const auto& item : value) {
+ sum += item.Duration;
+ }
+ sum = sum * 1000000 / cyclesPerMs;
+ s << key << " num# " << value.size() << " sum# " << sum << "ns max# " << (i->Duration * 1000000 / cyclesPerMs) << "ns";
+ if (i->Interior) {
+ s << " {" << i->Interior << "}";
+ }
+ }
+ }
+
+ return s.Str();
+ }
+ };
+
+} // NActors
diff --git a/library/cpp/actors/interconnect/slowpoke_actor.h b/library/cpp/actors/interconnect/slowpoke_actor.h
new file mode 100644
index 0000000000..4b02e5da48
--- /dev/null
+++ b/library/cpp/actors/interconnect/slowpoke_actor.h
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <library/cpp/actors/core/actor_bootstrapped.h>
+
+namespace NActors {
+
+ class TSlowpokeActor : public TActorBootstrapped<TSlowpokeActor> {
+ const TDuration Duration;
+ const TDuration SleepMin;
+ const TDuration SleepMax;
+ const TDuration RescheduleMin;
+ const TDuration RescheduleMax;
+
+ public:
+ static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
+ return NKikimrServices::TActivity::INTERCONNECT_COMMON;
+ }
+
+ TSlowpokeActor(TDuration duration, TDuration sleepMin, TDuration sleepMax, TDuration rescheduleMin, TDuration rescheduleMax)
+ : Duration(duration)
+ , SleepMin(sleepMin)
+ , SleepMax(sleepMax)
+ , RescheduleMin(rescheduleMin)
+ , RescheduleMax(rescheduleMax)
+ {}
+
+ void Bootstrap(const TActorContext& ctx) {
+ Become(&TThis::StateFunc, ctx, Duration, new TEvents::TEvPoisonPill);
+ HandleWakeup(ctx);
+ }
+
+ void HandleWakeup(const TActorContext& ctx) {
+ Sleep(RandomDuration(SleepMin, SleepMax));
+ ctx.Schedule(RandomDuration(RescheduleMin, RescheduleMax), new TEvents::TEvWakeup);
+ }
+
+ static TDuration RandomDuration(TDuration min, TDuration max) {
+ return min + TDuration::FromValue(RandomNumber<ui64>(max.GetValue() - min.GetValue() + 1));
+ }
+
+ STRICT_STFUNC(StateFunc,
+ CFunc(TEvents::TSystem::PoisonPill, Die)
+ CFunc(TEvents::TSystem::Wakeup, HandleWakeup)
+ )
+ };
+
+} // NActors
diff --git a/library/cpp/actors/interconnect/types.cpp b/library/cpp/actors/interconnect/types.cpp
new file mode 100644
index 0000000000..979c55f277
--- /dev/null
+++ b/library/cpp/actors/interconnect/types.cpp
@@ -0,0 +1,564 @@
+#include "types.h"
+#include <util/string/printf.h>
+#include <util/generic/vector.h>
+#include <errno.h>
+
+namespace NActors {
+
+ TVector<const char*> TDisconnectReason::Reasons = {
+ "EndOfStream",
+ "CloseOnIdle",
+ "LostConnection",
+ "DeadPeer",
+ "NewSession",
+ "HandshakeFailTransient",
+ "HandshakeFailPermanent",
+ "UserRequest",
+ "Debug",
+ "ChecksumError",
+ "FormatError",
+ "EventTooLarge",
+ "QueueOverload",
+ "E2BIG",
+ "EACCES",
+ "EADDRINUSE",
+ "EADDRNOTAVAIL",
+ "EADV",
+ "EAFNOSUPPORT",
+ "EAGAIN",
+ "EALREADY",
+ "EBADE",
+ "EBADF",
+ "EBADFD",
+ "EBADMSG",
+ "EBADR",
+ "EBADRQC",
+ "EBADSLT",
+ "EBFONT",
+ "EBUSY",
+ "ECANCELED",
+ "ECHILD",
+ "ECHRNG",
+ "ECOMM",
+ "ECONNABORTED",
+ "ECONNREFUSED",
+ "ECONNRESET",
+ "EDEADLK",
+ "EDEADLOCK",
+ "EDESTADDRREQ",
+ "EDOM",
+ "EDOTDOT",
+ "EDQUOT",
+ "EEXIST",
+ "EFAULT",
+ "EFBIG",
+ "EHOSTDOWN",
+ "EHOSTUNREACH",
+ "EHWPOISON",
+ "EIDRM",
+ "EILSEQ",
+ "EINPROGRESS",
+ "EINTR",
+ "EINVAL",
+ "EIO",
+ "EISCONN",
+ "EISDIR",
+ "EISNAM",
+ "EKEYEXPIRED",
+ "EKEYREJECTED",
+ "EKEYREVOKED",
+ "EL2HLT",
+ "EL2NSYNC",
+ "EL3HLT",
+ "EL3RST",
+ "ELIBACC",
+ "ELIBBAD",
+ "ELIBEXEC",
+ "ELIBMAX",
+ "ELIBSCN",
+ "ELNRNG",
+ "ELOOP",
+ "EMEDIUMTYPE",
+ "EMFILE",
+ "EMLINK",
+ "EMSGSIZE",
+ "EMULTIHOP",
+ "ENAMETOOLONG",
+ "ENAVAIL",
+ "ENETDOWN",
+ "ENETRESET",
+ "ENETUNREACH",
+ "ENFILE",
+ "ENOANO",
+ "ENOBUFS",
+ "ENOCSI",
+ "ENODATA",
+ "ENODEV",
+ "ENOENT",
+ "ENOEXEC",
+ "ENOKEY",
+ "ENOLCK",
+ "ENOLINK",
+ "ENOMEDIUM",
+ "ENOMEM",
+ "ENOMSG",
+ "ENONET",
+ "ENOPKG",
+ "ENOPROTOOPT",
+ "ENOSPC",
+ "ENOSR",
+ "ENOSTR",
+ "ENOSYS",
+ "ENOTBLK",
+ "ENOTCONN",
+ "ENOTDIR",
+ "ENOTEMPTY",
+ "ENOTNAM",
+ "ENOTRECOVERABLE",
+ "ENOTSOCK",
+ "ENOTTY",
+ "ENOTUNIQ",
+ "ENXIO",
+ "EOPNOTSUPP",
+ "EOVERFLOW",
+ "EOWNERDEAD",
+ "EPERM",
+ "EPFNOSUPPORT",
+ "EPIPE",
+ "EPROTO",
+ "EPROTONOSUPPORT",
+ "EPROTOTYPE",
+ "ERANGE",
+ "EREMCHG",
+ "EREMOTE",
+ "EREMOTEIO",
+ "ERESTART",
+ "ERFKILL",
+ "EROFS",
+ "ESHUTDOWN",
+ "ESOCKTNOSUPPORT",
+ "ESPIPE",
+ "ESRCH",
+ "ESRMNT",
+ "ESTALE",
+ "ESTRPIPE",
+ "ETIME",
+ "ETIMEDOUT",
+ "ETOOMANYREFS",
+ "ETXTBSY",
+ "EUCLEAN",
+ "EUNATCH",
+ "EUSERS",
+ "EWOULDBLOCK",
+ "EXDEV",
+ "EXFULL",
+ };
+
+ TDisconnectReason TDisconnectReason::FromErrno(int err) {
+ switch (err) {
+#define REASON(ERRNO) case ERRNO: return TDisconnectReason(TString(#ERRNO))
+#if defined(E2BIG)
+ REASON(E2BIG);
+#endif
+#if defined(EACCES)
+ REASON(EACCES);
+#endif
+#if defined(EADDRINUSE)
+ REASON(EADDRINUSE);
+#endif
+#if defined(EADDRNOTAVAIL)
+ REASON(EADDRNOTAVAIL);
+#endif
+#if defined(EADV)
+ REASON(EADV);
+#endif
+#if defined(EAFNOSUPPORT)
+ REASON(EAFNOSUPPORT);
+#endif
+#if defined(EAGAIN)
+ REASON(EAGAIN);
+#endif
+#if defined(EALREADY)
+ REASON(EALREADY);
+#endif
+#if defined(EBADE)
+ REASON(EBADE);
+#endif
+#if defined(EBADF)
+ REASON(EBADF);
+#endif
+#if defined(EBADFD)
+ REASON(EBADFD);
+#endif
+#if defined(EBADMSG)
+ REASON(EBADMSG);
+#endif
+#if defined(EBADR)
+ REASON(EBADR);
+#endif
+#if defined(EBADRQC)
+ REASON(EBADRQC);
+#endif
+#if defined(EBADSLT)
+ REASON(EBADSLT);
+#endif
+#if defined(EBFONT)
+ REASON(EBFONT);
+#endif
+#if defined(EBUSY)
+ REASON(EBUSY);
+#endif
+#if defined(ECANCELED)
+ REASON(ECANCELED);
+#endif
+#if defined(ECHILD)
+ REASON(ECHILD);
+#endif
+#if defined(ECHRNG)
+ REASON(ECHRNG);
+#endif
+#if defined(ECOMM)
+ REASON(ECOMM);
+#endif
+#if defined(ECONNABORTED)
+ REASON(ECONNABORTED);
+#endif
+#if defined(ECONNREFUSED)
+ REASON(ECONNREFUSED);
+#endif
+#if defined(ECONNRESET)
+ REASON(ECONNRESET);
+#endif
+#if defined(EDEADLK)
+ REASON(EDEADLK);
+#endif
+#if defined(EDEADLOCK) && (!defined(EDEADLK) || EDEADLOCK != EDEADLK)
+ REASON(EDEADLOCK);
+#endif
+#if defined(EDESTADDRREQ)
+ REASON(EDESTADDRREQ);
+#endif
+#if defined(EDOM)
+ REASON(EDOM);
+#endif
+#if defined(EDOTDOT)
+ REASON(EDOTDOT);
+#endif
+#if defined(EDQUOT)
+ REASON(EDQUOT);
+#endif
+#if defined(EEXIST)
+ REASON(EEXIST);
+#endif
+#if defined(EFAULT)
+ REASON(EFAULT);
+#endif
+#if defined(EFBIG)
+ REASON(EFBIG);
+#endif
+#if defined(EHOSTDOWN)
+ REASON(EHOSTDOWN);
+#endif
+#if defined(EHOSTUNREACH)
+ REASON(EHOSTUNREACH);
+#endif
+#if defined(EHWPOISON)
+ REASON(EHWPOISON);
+#endif
+#if defined(EIDRM)
+ REASON(EIDRM);
+#endif
+#if defined(EILSEQ)
+ REASON(EILSEQ);
+#endif
+#if defined(EINPROGRESS)
+ REASON(EINPROGRESS);
+#endif
+#if defined(EINTR)
+ REASON(EINTR);
+#endif
+#if defined(EINVAL)
+ REASON(EINVAL);
+#endif
+#if defined(EIO)
+ REASON(EIO);
+#endif
+#if defined(EISCONN)
+ REASON(EISCONN);
+#endif
+#if defined(EISDIR)
+ REASON(EISDIR);
+#endif
+#if defined(EISNAM)
+ REASON(EISNAM);
+#endif
+#if defined(EKEYEXPIRED)
+ REASON(EKEYEXPIRED);
+#endif
+#if defined(EKEYREJECTED)
+ REASON(EKEYREJECTED);
+#endif
+#if defined(EKEYREVOKED)
+ REASON(EKEYREVOKED);
+#endif
+#if defined(EL2HLT)
+ REASON(EL2HLT);
+#endif
+#if defined(EL2NSYNC)
+ REASON(EL2NSYNC);
+#endif
+#if defined(EL3HLT)
+ REASON(EL3HLT);
+#endif
+#if defined(EL3RST)
+ REASON(EL3RST);
+#endif
+#if defined(ELIBACC)
+ REASON(ELIBACC);
+#endif
+#if defined(ELIBBAD)
+ REASON(ELIBBAD);
+#endif
+#if defined(ELIBEXEC)
+ REASON(ELIBEXEC);
+#endif
+#if defined(ELIBMAX)
+ REASON(ELIBMAX);
+#endif
+#if defined(ELIBSCN)
+ REASON(ELIBSCN);
+#endif
+#if defined(ELNRNG)
+ REASON(ELNRNG);
+#endif
+#if defined(ELOOP)
+ REASON(ELOOP);
+#endif
+#if defined(EMEDIUMTYPE)
+ REASON(EMEDIUMTYPE);
+#endif
+#if defined(EMFILE)
+ REASON(EMFILE);
+#endif
+#if defined(EMLINK)
+ REASON(EMLINK);
+#endif
+#if defined(EMSGSIZE)
+ REASON(EMSGSIZE);
+#endif
+#if defined(EMULTIHOP)
+ REASON(EMULTIHOP);
+#endif
+#if defined(ENAMETOOLONG)
+ REASON(ENAMETOOLONG);
+#endif
+#if defined(ENAVAIL)
+ REASON(ENAVAIL);
+#endif
+#if defined(ENETDOWN)
+ REASON(ENETDOWN);
+#endif
+#if defined(ENETRESET)
+ REASON(ENETRESET);
+#endif
+#if defined(ENETUNREACH)
+ REASON(ENETUNREACH);
+#endif
+#if defined(ENFILE)
+ REASON(ENFILE);
+#endif
+#if defined(ENOANO)
+ REASON(ENOANO);
+#endif
+#if defined(ENOBUFS)
+ REASON(ENOBUFS);
+#endif
+#if defined(ENOCSI)
+ REASON(ENOCSI);
+#endif
+#if defined(ENODATA)
+ REASON(ENODATA);
+#endif
+#if defined(ENODEV)
+ REASON(ENODEV);
+#endif
+#if defined(ENOENT)
+ REASON(ENOENT);
+#endif
+#if defined(ENOEXEC)
+ REASON(ENOEXEC);
+#endif
+#if defined(ENOKEY)
+ REASON(ENOKEY);
+#endif
+#if defined(ENOLCK)
+ REASON(ENOLCK);
+#endif
+#if defined(ENOLINK)
+ REASON(ENOLINK);
+#endif
+#if defined(ENOMEDIUM)
+ REASON(ENOMEDIUM);
+#endif
+#if defined(ENOMEM)
+ REASON(ENOMEM);
+#endif
+#if defined(ENOMSG)
+ REASON(ENOMSG);
+#endif
+#if defined(ENONET)
+ REASON(ENONET);
+#endif
+#if defined(ENOPKG)
+ REASON(ENOPKG);
+#endif
+#if defined(ENOPROTOOPT)
+ REASON(ENOPROTOOPT);
+#endif
+#if defined(ENOSPC)
+ REASON(ENOSPC);
+#endif
+#if defined(ENOSR)
+ REASON(ENOSR);
+#endif
+#if defined(ENOSTR)
+ REASON(ENOSTR);
+#endif
+#if defined(ENOSYS)
+ REASON(ENOSYS);
+#endif
+#if defined(ENOTBLK)
+ REASON(ENOTBLK);
+#endif
+#if defined(ENOTCONN)
+ REASON(ENOTCONN);
+#endif
+#if defined(ENOTDIR)
+ REASON(ENOTDIR);
+#endif
+#if defined(ENOTEMPTY)
+ REASON(ENOTEMPTY);
+#endif
+#if defined(ENOTNAM)
+ REASON(ENOTNAM);
+#endif
+#if defined(ENOTRECOVERABLE)
+ REASON(ENOTRECOVERABLE);
+#endif
+#if defined(ENOTSOCK)
+ REASON(ENOTSOCK);
+#endif
+#if defined(ENOTTY)
+ REASON(ENOTTY);
+#endif
+#if defined(ENOTUNIQ)
+ REASON(ENOTUNIQ);
+#endif
+#if defined(ENXIO)
+ REASON(ENXIO);
+#endif
+#if defined(EOPNOTSUPP)
+ REASON(EOPNOTSUPP);
+#endif
+#if defined(EOVERFLOW)
+ REASON(EOVERFLOW);
+#endif
+#if defined(EOWNERDEAD)
+ REASON(EOWNERDEAD);
+#endif
+#if defined(EPERM)
+ REASON(EPERM);
+#endif
+#if defined(EPFNOSUPPORT)
+ REASON(EPFNOSUPPORT);
+#endif
+#if defined(EPIPE)
+ REASON(EPIPE);
+#endif
+#if defined(EPROTO)
+ REASON(EPROTO);
+#endif
+#if defined(EPROTONOSUPPORT)
+ REASON(EPROTONOSUPPORT);
+#endif
+#if defined(EPROTOTYPE)
+ REASON(EPROTOTYPE);
+#endif
+#if defined(ERANGE)
+ REASON(ERANGE);
+#endif
+#if defined(EREMCHG)
+ REASON(EREMCHG);
+#endif
+#if defined(EREMOTE)
+ REASON(EREMOTE);
+#endif
+#if defined(EREMOTEIO)
+ REASON(EREMOTEIO);
+#endif
+#if defined(ERESTART)
+ REASON(ERESTART);
+#endif
+#if defined(ERFKILL)
+ REASON(ERFKILL);
+#endif
+#if defined(EROFS)
+ REASON(EROFS);
+#endif
+#if defined(ESHUTDOWN)
+ REASON(ESHUTDOWN);
+#endif
+#if defined(ESOCKTNOSUPPORT)
+ REASON(ESOCKTNOSUPPORT);
+#endif
+#if defined(ESPIPE)
+ REASON(ESPIPE);
+#endif
+#if defined(ESRCH)
+ REASON(ESRCH);
+#endif
+#if defined(ESRMNT)
+ REASON(ESRMNT);
+#endif
+#if defined(ESTALE)
+ REASON(ESTALE);
+#endif
+#if defined(ESTRPIPE)
+ REASON(ESTRPIPE);
+#endif
+#if defined(ETIME)
+ REASON(ETIME);
+#endif
+#if defined(ETIMEDOUT)
+ REASON(ETIMEDOUT);
+#endif
+#if defined(ETOOMANYREFS)
+ REASON(ETOOMANYREFS);
+#endif
+#if defined(ETXTBSY)
+ REASON(ETXTBSY);
+#endif
+#if defined(EUCLEAN)
+ REASON(EUCLEAN);
+#endif
+#if defined(EUNATCH)
+ REASON(EUNATCH);
+#endif
+#if defined(EUSERS)
+ REASON(EUSERS);
+#endif
+#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || EWOULDBLOCK != EAGAIN)
+ REASON(EWOULDBLOCK);
+#endif
+#if defined(EXDEV)
+ REASON(EXDEV);
+#endif
+#if defined(EXFULL)
+ REASON(EXFULL);
+#endif
+ default:
+ return TDisconnectReason(Sprintf("errno=%d", errno));
+ }
+ }
+
+} // NActors
diff --git a/library/cpp/actors/interconnect/types.h b/library/cpp/actors/interconnect/types.h
new file mode 100644
index 0000000000..2662c50c22
--- /dev/null
+++ b/library/cpp/actors/interconnect/types.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <util/generic/string.h>
+
+namespace NActors {
+
+ class TDisconnectReason {
+ TString Text;
+
+ private:
+ explicit TDisconnectReason(TString text)
+ : Text(std::move(text))
+ {}
+
+ public:
+ TDisconnectReason() = default;
+ TDisconnectReason(const TDisconnectReason&) = default;
+ TDisconnectReason(TDisconnectReason&&) = default;
+
+ static TDisconnectReason FromErrno(int err);
+
+ static TDisconnectReason EndOfStream() { return TDisconnectReason("EndOfStream"); }
+ static TDisconnectReason CloseOnIdle() { return TDisconnectReason("CloseOnIdle"); }
+ static TDisconnectReason LostConnection() { return TDisconnectReason("LostConnection"); }
+ static TDisconnectReason DeadPeer() { return TDisconnectReason("DeadPeer"); }
+ static TDisconnectReason NewSession() { return TDisconnectReason("NewSession"); }
+ static TDisconnectReason HandshakeFailTransient() { return TDisconnectReason("HandshakeFailTransient"); }
+ static TDisconnectReason HandshakeFailPermanent() { return TDisconnectReason("HandshakeFailPermanent"); }
+ static TDisconnectReason UserRequest() { return TDisconnectReason("UserRequest"); }
+ static TDisconnectReason Debug() { return TDisconnectReason("Debug"); }
+ static TDisconnectReason ChecksumError() { return TDisconnectReason("ChecksumError"); }
+ static TDisconnectReason FormatError() { return TDisconnectReason("FormatError"); }
+ static TDisconnectReason EventTooLarge() { return TDisconnectReason("EventTooLarge"); }
+ static TDisconnectReason QueueOverload() { return TDisconnectReason("QueueOverload"); }
+
+ TString ToString() const {
+ return Text;
+ }
+
+ static TVector<const char*> Reasons;
+ };
+
+} // NActors
diff --git a/library/cpp/actors/interconnect/ut/channel_scheduler_ut.cpp b/library/cpp/actors/interconnect/ut/channel_scheduler_ut.cpp
new file mode 100644
index 0000000000..565a511859
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut/channel_scheduler_ut.cpp
@@ -0,0 +1,115 @@
+#include <library/cpp/actors/interconnect/channel_scheduler.h>
+#include <library/cpp/actors/interconnect/events_local.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NActors;
+
+Y_UNIT_TEST_SUITE(ChannelScheduler) {
+
+ Y_UNIT_TEST(PriorityTraffic) {
+ auto common = MakeIntrusive<TInterconnectProxyCommon>();
+ common->MonCounters = MakeIntrusive<NMonitoring::TDynamicCounters>();
+ std::shared_ptr<IInterconnectMetrics> ctr = CreateInterconnectCounters(common);
+ ctr->SetPeerInfo("peer", "1");
+ auto callback = [](THolder<IEventBase>) {};
+ TEventHolderPool pool(common, callback);
+ TSessionParams p;
+ TChannelScheduler scheduler(1, {}, ctr, pool, 64 << 20, p);
+
+ ui32 numEvents = 0;
+
+ auto pushEvent = [&](size_t size, int channel) {
+ TString payload(size, 'X');
+ auto ev = MakeHolder<IEventHandle>(1, 0, TActorId(), TActorId(), MakeIntrusive<TEventSerializedData>(payload, false), 0);
+ auto& ch = scheduler.GetOutputChannel(channel);
+ const bool wasWorking = ch.IsWorking();
+ ch.Push(*ev);
+ if (!wasWorking) {
+ scheduler.AddToHeap(ch, 0);
+ }
+ ++numEvents;
+ };
+
+ for (ui32 i = 0; i < 100; ++i) {
+ pushEvent(10000, 1);
+ }
+
+ for (ui32 i = 0; i < 1000; ++i) {
+ pushEvent(1000, 2);
+ }
+
+ std::map<ui16, ui32> run;
+ ui32 step = 0;
+
+ std::deque<std::map<ui16, ui32>> window;
+
+ for (; numEvents; ++step) {
+ TTcpPacketOutTask task(p);
+
+ if (step == 100) {
+ for (ui32 i = 0; i < 200; ++i) {
+ pushEvent(1000, 3);
+ }
+ }
+
+ std::map<ui16, ui32> ch;
+
+ while (numEvents) {
+ TEventOutputChannel *channel = scheduler.PickChannelWithLeastConsumedWeight();
+ ui32 before = task.GetDataSize();
+ ui64 weightConsumed = 0;
+ numEvents -= channel->FeedBuf(task, 0, &weightConsumed);
+ ui32 after = task.GetDataSize();
+ Y_VERIFY(after >= before);
+ scheduler.FinishPick(weightConsumed, 0);
+ const ui32 bytesAdded = after - before;
+ if (!bytesAdded) {
+ break;
+ }
+ ch[channel->ChannelId] += bytesAdded;
+ }
+
+ scheduler.Equalize();
+
+ for (const auto& [key, value] : ch) {
+ run[key] += value;
+ }
+ window.push_back(ch);
+
+ if (window.size() == 32) {
+ for (const auto& [key, value] : window.front()) {
+ run[key] -= value;
+ if (!run[key]) {
+ run.erase(key);
+ }
+ }
+ window.pop_front();
+ }
+
+ double mean = 0.0;
+ for (const auto& [key, value] : run) {
+ mean += value;
+ }
+ mean /= run.size();
+
+ double dev = 0.0;
+ for (const auto& [key, value] : run) {
+ dev += (value - mean) * (value - mean);
+ }
+ dev = sqrt(dev / run.size());
+
+ double devToMean = dev / mean;
+
+ Cerr << step << ": ";
+ for (const auto& [key, value] : run) {
+ Cerr << "ch" << key << "=" << value << " ";
+ }
+ Cerr << "mean# " << mean << " dev# " << dev << " part# " << devToMean;
+
+ Cerr << Endl;
+
+ UNIT_ASSERT(devToMean < 1);
+ }
+ }
+
+}
diff --git a/library/cpp/actors/interconnect/ut/dynamic_proxy_ut.cpp b/library/cpp/actors/interconnect/ut/dynamic_proxy_ut.cpp
new file mode 100644
index 0000000000..3c474979dc
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut/dynamic_proxy_ut.cpp
@@ -0,0 +1,179 @@
+#include <library/cpp/actors/interconnect/ut/lib/node.h>
+#include <library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+TActorId MakeResponderServiceId(ui32 nodeId) {
+ return TActorId(nodeId, TStringBuf("ResponderAct", 12));
+}
+
+class TArriveQueue {
+ struct TArrivedItem {
+ ui32 QueueId;
+ ui32 Index;
+ bool Success;
+ };
+
+ TMutex Lock;
+ std::size_t Counter = 0;
+ std::vector<TArrivedItem> Items;
+
+public:
+ TArriveQueue(size_t capacity)
+ : Items(capacity)
+ {}
+
+ bool Done() const {
+ with_lock (Lock) {
+ return Counter == Items.size();
+ }
+ }
+
+ void Push(ui64 cookie, bool success) {
+ with_lock (Lock) {
+ const size_t pos = Counter++;
+ TArrivedItem item{.QueueId = static_cast<ui32>(cookie >> 32), .Index = static_cast<ui32>(cookie & 0xffff'ffff),
+ .Success = success};
+ memcpy(&Items[pos], &item, sizeof(TArrivedItem));
+ }
+ }
+
+ void Check() {
+ struct TPerQueueState {
+ std::vector<ui32> Ok, Error;
+ };
+ std::unordered_map<ui32, TPerQueueState> state;
+ for (const TArrivedItem& item : Items) {
+ auto& st = state[item.QueueId];
+ auto& v = item.Success ? st.Ok : st.Error;
+ v.push_back(item.Index);
+ }
+ for (const auto& [queueId, st] : state) {
+ ui32 expected = 0;
+ for (const ui32 index : st.Ok) {
+ Y_VERIFY(index == expected);
+ ++expected;
+ }
+ for (const ui32 index : st.Error) {
+ Y_VERIFY(index == expected);
+ ++expected;
+ }
+ if (st.Error.size()) {
+ Cerr << "Error.size# " << st.Error.size() << Endl;
+ }
+ }
+ }
+};
+
+class TResponder : public TActor<TResponder> {
+ TArriveQueue& ArriveQueue;
+
+public:
+ TResponder(TArriveQueue& arriveQueue)
+ : TActor(&TResponder::StateFunc)
+ , ArriveQueue(arriveQueue)
+ {}
+
+ STRICT_STFUNC(StateFunc,
+ hFunc(TEvents::TEvPing, Handle);
+ )
+
+ void Handle(TEvents::TEvPing::TPtr ev) {
+ ArriveQueue.Push(ev->Cookie, true);
+ }
+};
+
+class TSender : public TActor<TSender> {
+ TArriveQueue& ArriveQueue;
+
+public:
+ TSender(TArriveQueue& arriveQueue)
+ : TActor(&TThis::StateFunc)
+ , ArriveQueue(arriveQueue)
+ {}
+
+ STRICT_STFUNC(StateFunc,
+ hFunc(TEvents::TEvUndelivered, Handle);
+ )
+
+ void Handle(TEvents::TEvUndelivered::TPtr ev) {
+ ArriveQueue.Push(ev->Cookie, false);
+ }
+};
+
+void SenderThread(TMutex& lock, TActorSystem *as, ui32 nodeId, ui32 queueId, ui32 count, TArriveQueue& arriveQueue) {
+ const TActorId sender = as->Register(new TSender(arriveQueue));
+ with_lock(lock) {}
+ const TActorId target = MakeResponderServiceId(nodeId);
+ for (ui32 i = 0; i < count; ++i) {
+ const ui32 flags = IEventHandle::FlagTrackDelivery;
+ as->Send(new IEventHandle(TEvents::THelloWorld::Ping, flags, target, sender, nullptr, ((ui64)queueId << 32) | i));
+ }
+}
+
+void RaceTestIter(ui32 numThreads, ui32 count) {
+ TPortManager portman;
+ THashMap<ui32, ui16> nodeToPort;
+ const ui32 numNodes = 6; // total
+ const ui32 numDynamicNodes = 3;
+ for (ui32 i = 1; i <= numNodes; ++i) {
+ nodeToPort.emplace(i, portman.GetPort());
+ }
+
+ NMonitoring::TDynamicCounterPtr counters = new NMonitoring::TDynamicCounters;
+ std::list<TNode> nodes;
+ for (ui32 i = 1; i <= numNodes; ++i) {
+ nodes.emplace_back(i, numNodes, nodeToPort, "127.1.0.0", counters->GetSubgroup("nodeId", TStringBuilder() << i),
+ TDuration::Seconds(10), TChannelsConfig(), numDynamicNodes, numThreads);
+ }
+
+ const ui32 numSenders = 10;
+ TArriveQueue arriveQueue(numSenders * numNodes * (numNodes - 1) * count);
+ for (TNode& node : nodes) {
+ node.RegisterServiceActor(MakeResponderServiceId(node.GetActorSystem()->NodeId), new TResponder(arriveQueue));
+ }
+
+ TMutex lock;
+ std::list<TThread> threads;
+ ui32 queueId = 0;
+ with_lock(lock) {
+ for (TNode& from : nodes) {
+ for (ui32 toId = 1; toId <= numNodes; ++toId) {
+ if (toId == from.GetActorSystem()->NodeId) {
+ continue;
+ }
+ for (ui32 i = 0; i < numSenders; ++i) {
+ threads.emplace_back([=, &lock, &from, &arriveQueue] {
+ SenderThread(lock, from.GetActorSystem(), toId, queueId, count, arriveQueue);
+ });
+ ++queueId;
+ }
+ }
+ }
+ for (auto& thread : threads) {
+ thread.Start();
+ }
+ }
+ for (auto& thread : threads) {
+ thread.Join();
+ }
+
+ for (THPTimer timer; !arriveQueue.Done(); TDuration::MilliSeconds(10)) {
+ Y_VERIFY(timer.Passed() < 10);
+ }
+
+ nodes.clear();
+ arriveQueue.Check();
+}
+
+Y_UNIT_TEST_SUITE(DynamicProxy) {
+ Y_UNIT_TEST(RaceCheck1) {
+ for (ui32 iteration = 0; iteration < 100; ++iteration) {
+ RaceTestIter(1 + iteration % 5, 1);
+ }
+ }
+ Y_UNIT_TEST(RaceCheck10) {
+ for (ui32 iteration = 0; iteration < 100; ++iteration) {
+ RaceTestIter(1 + iteration % 5, 10);
+ }
+ }
+}
diff --git a/library/cpp/actors/interconnect/ut/event_holder_pool_ut.cpp b/library/cpp/actors/interconnect/ut/event_holder_pool_ut.cpp
new file mode 100644
index 0000000000..e6b2bd4e4c
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut/event_holder_pool_ut.cpp
@@ -0,0 +1,59 @@
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/actors/core/events.h>
+#include <library/cpp/actors/core/event_local.h>
+#include <library/cpp/actors/interconnect/interconnect_common.h>
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+#include <library/cpp/actors/interconnect/event_holder_pool.h>
+
+#include <atomic>
+
+using namespace NActors;
+
+template<typename T>
+TEventHolderPool Setup(T&& callback) {
+ auto common = MakeIntrusive<TInterconnectProxyCommon>();
+ common->DestructorQueueSize = std::make_shared<std::atomic<TAtomicBase>>();
+ common->MaxDestructorQueueSize = 1024 * 1024;
+ return TEventHolderPool(common, callback);
+}
+
+Y_UNIT_TEST_SUITE(EventHolderPool) {
+
+ Y_UNIT_TEST(Overflow) {
+ TDeque<THolder<IEventBase>> freeQ;
+ auto callback = [&](THolder<IEventBase> event) {
+ freeQ.push_back(std::move(event));
+ };
+ auto pool = Setup(std::move(callback));
+
+ std::list<TEventHolder> q;
+
+ auto& ev1 = pool.Allocate(q);
+ ev1.Buffer = MakeIntrusive<TEventSerializedData>(TString::Uninitialized(512 * 1024), true);
+
+ auto& ev2 = pool.Allocate(q);
+ ev2.Buffer = MakeIntrusive<TEventSerializedData>(TString::Uninitialized(512 * 1024), true);
+
+ auto& ev3 = pool.Allocate(q);
+ ev3.Buffer = MakeIntrusive<TEventSerializedData>(TString::Uninitialized(512 * 1024), true);
+
+ auto& ev4 = pool.Allocate(q);
+ ev4.Buffer = MakeIntrusive<TEventSerializedData>(TString::Uninitialized(512 * 1024), true);
+
+ pool.Release(q, q.begin());
+ pool.Release(q, q.begin());
+ pool.Trim();
+ UNIT_ASSERT_VALUES_EQUAL(freeQ.size(), 1);
+
+ pool.Release(q, q.begin());
+ UNIT_ASSERT_VALUES_EQUAL(freeQ.size(), 1);
+
+ freeQ.clear();
+ pool.Release(q, q.begin());
+ pool.Trim();
+ UNIT_ASSERT_VALUES_EQUAL(freeQ.size(), 1);
+
+ freeQ.clear(); // if we don't this, we may probablty crash due to the order of object destruction
+ }
+
+}
diff --git a/library/cpp/actors/interconnect/ut/interconnect_ut.cpp b/library/cpp/actors/interconnect/ut/interconnect_ut.cpp
new file mode 100644
index 0000000000..8ef0b1507c
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut/interconnect_ut.cpp
@@ -0,0 +1,177 @@
+#include <library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h>
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/digest/md5/md5.h>
+#include <util/random/fast.h>
+
+using namespace NActors;
+
+class TSenderActor : public TActorBootstrapped<TSenderActor> {
+ const TActorId Recipient;
+ using TSessionToCookie = std::unordered_multimap<TActorId, ui64, THash<TActorId>>;
+ TSessionToCookie SessionToCookie;
+ std::unordered_map<ui64, std::pair<TSessionToCookie::iterator, TString>> InFlight;
+ std::unordered_map<ui64, TString> Tentative;
+ ui64 NextCookie = 0;
+ TActorId SessionId;
+ bool SubscribeInFlight = false;
+
+public:
+ TSenderActor(TActorId recipient)
+ : Recipient(recipient)
+ {}
+
+ void Bootstrap() {
+ Become(&TThis::StateFunc);
+ Subscribe();
+ }
+
+ void Subscribe() {
+ Cerr << (TStringBuilder() << "Subscribe" << Endl);
+ Y_VERIFY(!SubscribeInFlight);
+ SubscribeInFlight = true;
+ Send(TActivationContext::InterconnectProxy(Recipient.NodeId()), new TEvents::TEvSubscribe);
+ }
+
+ void IssueQueries() {
+ if (!SessionId) {
+ return;
+ }
+ while (InFlight.size() < 10) {
+ size_t len = RandomNumber<size_t>(65536) + 1;
+ TString data = TString::Uninitialized(len);
+ TReallyFastRng32 rng(RandomNumber<ui32>());
+ char *p = data.Detach();
+ for (size_t i = 0; i < len; ++i) {
+ p[i] = rng();
+ }
+ const TSessionToCookie::iterator s2cIt = SessionToCookie.emplace(SessionId, NextCookie);
+ InFlight.emplace(NextCookie, std::make_tuple(s2cIt, MD5::CalcRaw(data)));
+ TActivationContext::Send(new IEventHandle(TEvents::THelloWorld::Ping, IEventHandle::FlagTrackDelivery, Recipient,
+ SelfId(), MakeIntrusive<TEventSerializedData>(std::move(data), false), NextCookie));
+// Cerr << (TStringBuilder() << "Send# " << NextCookie << Endl);
+ ++NextCookie;
+ }
+ }
+
+ void HandlePong(TAutoPtr<IEventHandle> ev) {
+// Cerr << (TStringBuilder() << "Receive# " << ev->Cookie << Endl);
+ if (const auto it = InFlight.find(ev->Cookie); it != InFlight.end()) {
+ auto& [s2cIt, hash] = it->second;
+ Y_VERIFY(hash == ev->GetChainBuffer()->GetString());
+ SessionToCookie.erase(s2cIt);
+ InFlight.erase(it);
+ } else if (const auto it = Tentative.find(ev->Cookie); it != Tentative.end()) {
+ Y_VERIFY(it->second == ev->GetChainBuffer()->GetString());
+ Tentative.erase(it);
+ } else {
+ Y_FAIL("Cookie# %" PRIu64, ev->Cookie);
+ }
+ IssueQueries();
+ }
+
+ void Handle(TEvInterconnect::TEvNodeConnected::TPtr ev) {
+ Cerr << (TStringBuilder() << "TEvNodeConnected" << Endl);
+ Y_VERIFY(SubscribeInFlight);
+ SubscribeInFlight = false;
+ Y_VERIFY(!SessionId);
+ SessionId = ev->Sender;
+ IssueQueries();
+ }
+
+ void Handle(TEvInterconnect::TEvNodeDisconnected::TPtr ev) {
+ Cerr << (TStringBuilder() << "TEvNodeDisconnected" << Endl);
+ SubscribeInFlight = false;
+ if (SessionId) {
+ Y_VERIFY(SessionId == ev->Sender);
+ auto r = SessionToCookie.equal_range(SessionId);
+ for (auto it = r.first; it != r.second; ++it) {
+ const auto inFlightIt = InFlight.find(it->second);
+ Y_VERIFY(inFlightIt != InFlight.end());
+ Tentative.emplace(inFlightIt->first, inFlightIt->second.second);
+ InFlight.erase(it->second);
+ }
+ SessionToCookie.erase(r.first, r.second);
+ SessionId = TActorId();
+ }
+ Schedule(TDuration::MilliSeconds(100), new TEvents::TEvWakeup);
+ }
+
+ void Handle(TEvents::TEvUndelivered::TPtr ev) {
+ Cerr << (TStringBuilder() << "TEvUndelivered Cookie# " << ev->Cookie << Endl);
+ if (const auto it = InFlight.find(ev->Cookie); it != InFlight.end()) {
+ auto& [s2cIt, hash] = it->second;
+ Tentative.emplace(it->first, hash);
+ SessionToCookie.erase(s2cIt);
+ InFlight.erase(it);
+ IssueQueries();
+ }
+ }
+
+ STRICT_STFUNC(StateFunc,
+ fFunc(TEvents::THelloWorld::Pong, HandlePong);
+ hFunc(TEvInterconnect::TEvNodeConnected, Handle);
+ hFunc(TEvInterconnect::TEvNodeDisconnected, Handle);
+ hFunc(TEvents::TEvUndelivered, Handle);
+ cFunc(TEvents::TSystem::Wakeup, Subscribe);
+ )
+};
+
+class TRecipientActor : public TActor<TRecipientActor> {
+public:
+ TRecipientActor()
+ : TActor(&TThis::StateFunc)
+ {}
+
+ void HandlePing(TAutoPtr<IEventHandle>& ev) {
+ const TString& data = ev->GetChainBuffer()->GetString();
+ const TString& response = MD5::CalcRaw(data);
+ TActivationContext::Send(new IEventHandle(TEvents::THelloWorld::Pong, 0, ev->Sender, SelfId(),
+ MakeIntrusive<TEventSerializedData>(response, false), ev->Cookie));
+ }
+
+ STRICT_STFUNC(StateFunc,
+ fFunc(TEvents::THelloWorld::Ping, HandlePing);
+ )
+};
+
+Y_UNIT_TEST_SUITE(Interconnect) {
+
+ Y_UNIT_TEST(SessionContinuation) {
+ TTestICCluster cluster(2);
+ const TActorId recipient = cluster.RegisterActor(new TRecipientActor, 1);
+ cluster.RegisterActor(new TSenderActor(recipient), 2);
+ for (ui32 i = 0; i < 100; ++i) {
+ const ui32 nodeId = 1 + RandomNumber(2u);
+ const ui32 peerNodeId = 3 - nodeId;
+ const ui32 action = RandomNumber(3u);
+ auto *node = cluster.GetNode(nodeId);
+ TActorId proxyId = node->InterconnectProxy(peerNodeId);
+
+ switch (action) {
+ case 0:
+ node->Send(proxyId, new TEvInterconnect::TEvClosePeerSocket);
+ Cerr << (TStringBuilder() << "nodeId# " << nodeId << " peerNodeId# " << peerNodeId
+ << " TEvClosePeerSocket" << Endl);
+ break;
+
+ case 1:
+ node->Send(proxyId, new TEvInterconnect::TEvCloseInputSession);
+ Cerr << (TStringBuilder() << "nodeId# " << nodeId << " peerNodeId# " << peerNodeId
+ << " TEvCloseInputSession" << Endl);
+ break;
+
+ case 2:
+ node->Send(proxyId, new TEvInterconnect::TEvPoisonSession);
+ Cerr << (TStringBuilder() << "nodeId# " << nodeId << " peerNodeId# " << peerNodeId
+ << " TEvPoisonSession" << Endl);
+ break;
+
+ default:
+ Y_FAIL();
+ }
+
+ Sleep(TDuration::MilliSeconds(RandomNumber<ui32>(500) + 100));
+ }
+ }
+
+}
diff --git a/library/cpp/actors/interconnect/ut/large.cpp b/library/cpp/actors/interconnect/ut/large.cpp
new file mode 100644
index 0000000000..ba2a50c6f6
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut/large.cpp
@@ -0,0 +1,85 @@
+#include "lib/ic_test_cluster.h"
+#include "lib/test_events.h"
+#include "lib/test_actors.h"
+
+#include <library/cpp/actors/interconnect/interconnect_tcp_proxy.h>
+
+#include <library/cpp/testing/unittest/tests_data.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/system/event.h>
+#include <util/system/sanitizers.h>
+
+Y_UNIT_TEST_SUITE(LargeMessage) {
+ using namespace NActors;
+
+ class TProducer: public TActorBootstrapped<TProducer> {
+ const TActorId RecipientActorId;
+
+ public:
+ TProducer(const TActorId& recipientActorId)
+ : RecipientActorId(recipientActorId)
+ {}
+
+ void Bootstrap(const TActorContext& ctx) {
+ Become(&TThis::StateFunc);
+ ctx.Send(RecipientActorId, new TEvTest(1, "hello"), IEventHandle::FlagTrackDelivery, 1);
+ ctx.Send(RecipientActorId, new TEvTest(2, TString(128 * 1024 * 1024, 'X')), IEventHandle::FlagTrackDelivery, 2);
+ }
+
+ void Handle(TEvents::TEvUndelivered::TPtr ev, const TActorContext& ctx) {
+ if (ev->Cookie == 2) {
+ Cerr << "TEvUndelivered\n";
+ ctx.Send(RecipientActorId, new TEvTest(3, "hello"), IEventHandle::FlagTrackDelivery, 3);
+ }
+ }
+
+ STRICT_STFUNC(StateFunc,
+ HFunc(TEvents::TEvUndelivered, Handle)
+ )
+ };
+
+ class TConsumer : public TActorBootstrapped<TConsumer> {
+ TManualEvent& Done;
+ TActorId SessionId;
+
+ public:
+ TConsumer(TManualEvent& done)
+ : Done(done)
+ {
+ }
+
+ void Bootstrap(const TActorContext& /*ctx*/) {
+ Become(&TThis::StateFunc);
+ }
+
+ void Handle(TEvTest::TPtr ev, const TActorContext& /*ctx*/) {
+ const auto& record = ev->Get()->Record;
+ Cerr << "RECEIVED TEvTest\n";
+ if (record.GetSequenceNumber() == 1) {
+ Y_VERIFY(!SessionId);
+ SessionId = ev->InterconnectSession;
+ } else if (record.GetSequenceNumber() == 3) {
+ Y_VERIFY(SessionId != ev->InterconnectSession);
+ Done.Signal();
+ } else {
+ Y_FAIL("incorrect sequence number");
+ }
+ }
+
+ STRICT_STFUNC(StateFunc,
+ HFunc(TEvTest, Handle)
+ )
+ };
+
+ Y_UNIT_TEST(Test) {
+ TTestICCluster testCluster(2);
+
+ TManualEvent done;
+ TConsumer* consumer = new TConsumer(done);
+ const TActorId recp = testCluster.RegisterActor(consumer, 1);
+ testCluster.RegisterActor(new TProducer(recp), 2);
+ done.WaitI();
+ }
+
+}
diff --git a/library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h b/library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h
new file mode 100644
index 0000000000..2b6d27cd3f
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h
@@ -0,0 +1,84 @@
+#pragma once
+
+#include "node.h"
+#include "interrupter.h"
+
+#include <library/cpp/actors/interconnect/interconnect_tcp_proxy.h>
+#include <library/cpp/actors/core/events.h>
+#include <library/cpp/testing/unittest/tests_data.h>
+
+#include <util/generic/noncopyable.h>
+
+class TTestICCluster: public TNonCopyable {
+public:
+ struct TTrafficInterrupterSettings {
+ TDuration RejectingTrafficTimeout;
+ double BandWidth;
+ bool Disconnect;
+ };
+
+private:
+ const ui32 NumNodes;
+ const TString Address = "::1";
+ TDuration DeadPeerTimeout = TDuration::Seconds(2);
+ NMonitoring::TDynamicCounterPtr Counters;
+ THashMap<ui32, THolder<TNode>> Nodes;
+ TList<TTrafficInterrupter> interrupters;
+ NActors::TChannelsConfig ChannelsConfig;
+ TPortManager PortManager;
+
+public:
+ TTestICCluster(ui32 numNodes = 1, NActors::TChannelsConfig channelsConfig = NActors::TChannelsConfig(),
+ TTrafficInterrupterSettings* tiSettings = nullptr)
+ : NumNodes(numNodes)
+ , Counters(new NMonitoring::TDynamicCounters)
+ , ChannelsConfig(channelsConfig)
+ {
+ THashMap<ui32, ui16> nodeToPortMap;
+ THashMap<ui32, THashMap<ui32, ui16>> specificNodePortMap;
+
+ for (ui32 i = 1; i <= NumNodes; ++i) {
+ nodeToPortMap.emplace(i, PortManager.GetPort());
+ }
+
+ if (tiSettings) {
+ ui32 nodeId;
+ ui16 listenPort;
+ ui16 forwardPort;
+ for (auto& item : nodeToPortMap) {
+ nodeId = item.first;
+ listenPort = item.second;
+ forwardPort = PortManager.GetPort();
+
+ specificNodePortMap[nodeId] = nodeToPortMap;
+ specificNodePortMap[nodeId].at(nodeId) = forwardPort;
+ interrupters.emplace_back(Address, listenPort, forwardPort, tiSettings->RejectingTrafficTimeout, tiSettings->BandWidth, tiSettings->Disconnect);
+ interrupters.back().Start();
+ }
+ }
+
+ for (ui32 i = 1; i <= NumNodes; ++i) {
+ auto& portMap = tiSettings ? specificNodePortMap[i] : nodeToPortMap;
+ Nodes.emplace(i, MakeHolder<TNode>(i, NumNodes, portMap, Address, Counters, DeadPeerTimeout, ChannelsConfig));
+ }
+ }
+
+ TNode* GetNode(ui32 id) {
+ return Nodes[id].Get();
+ }
+
+ ~TTestICCluster() {
+ }
+
+ TActorId RegisterActor(NActors::IActor* actor, ui32 nodeId) {
+ return Nodes[nodeId]->RegisterActor(actor);
+ }
+
+ TActorId InterconnectProxy(ui32 peerNodeId, ui32 nodeId) {
+ return Nodes[nodeId]->InterconnectProxy(peerNodeId);
+ }
+
+ void KillActor(ui32 nodeId, const TActorId& id) {
+ Nodes[nodeId]->Send(id, new NActors::TEvents::TEvPoisonPill);
+ }
+};
diff --git a/library/cpp/actors/interconnect/ut/lib/interrupter.h b/library/cpp/actors/interconnect/ut/lib/interrupter.h
new file mode 100644
index 0000000000..48851de2c5
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut/lib/interrupter.h
@@ -0,0 +1,249 @@
+#pragma once
+
+#include <library/cpp/testing/unittest/tests_data.h>
+
+#include <util/network/sock.h>
+#include <util/network/poller.h>
+#include <util/system/thread.h>
+#include <util/system/hp_timer.h>
+#include <util/generic/list.h>
+#include <util/generic/set.h>
+#include <util/generic/vector.h>
+#include <util/generic/deque.h>
+#include <util/random/random.h>
+
+#include <iterator>
+
+class TTrafficInterrupter
+ : public ISimpleThread {
+ const TString Address;
+ const ui16 ForwardPort;
+ TInet6StreamSocket ListenSocket;
+
+ struct TConnectionDescriptor;
+ struct TDelayedPacket {
+ TInet6StreamSocket* ForwardSocket = nullptr;
+ TVector<char> Data;
+ };
+ struct TCompare {
+ bool operator()(const std::pair<TInstant, TDelayedPacket>& x, const std::pair<TInstant, TDelayedPacket>& y) const {
+ return x.first > y.first;
+ };
+ };
+
+ struct TDirectedConnection {
+ TInet6StreamSocket* Source = nullptr;
+ TInet6StreamSocket* Destination = nullptr;
+ TList<TConnectionDescriptor>::iterator ListIterator;
+ TInstant Timestamp;
+ TPriorityQueue<std::pair<TInstant, TDelayedPacket>, TVector<std::pair<TInstant, TDelayedPacket>>, TCompare> DelayedQueue;
+
+ TDirectedConnection(TInet6StreamSocket* source, TInet6StreamSocket* destination)
+ : Source(source)
+ , Destination(destination)
+ {
+ }
+ };
+
+ struct TConnectionDescriptor {
+ std::unique_ptr<TInet6StreamSocket> FirstSocket;
+ std::unique_ptr<TInet6StreamSocket> SecondSocket;
+ TDirectedConnection ForwardConnection;
+ TDirectedConnection BackwardConnection;
+
+ TConnectionDescriptor(std::unique_ptr<TInet6StreamSocket> firstSock,
+ std::unique_ptr<TInet6StreamSocket> secondSock)
+ : FirstSocket(std::move(firstSock))
+ , SecondSocket(std::move(secondSock))
+ , ForwardConnection(FirstSocket.get(), SecondSocket.get())
+ , BackwardConnection(SecondSocket.get(), FirstSocket.get())
+ {
+ }
+ };
+
+ template <class It = TList<TConnectionDescriptor>::iterator>
+ class TCustomListIteratorCompare {
+ public:
+ bool operator()(const It& it1, const It& it2) const {
+ return (&(*it1) < &(*it2));
+ }
+ };
+
+ TList<TConnectionDescriptor> Connections;
+ TSet<TList<TConnectionDescriptor>::iterator, TCustomListIteratorCompare<>> DroppedConnections;
+
+public:
+ TTrafficInterrupter(TString address, ui16 listenPort, ui16 forwardPort, TDuration rejectingTrafficTimeout, double bandwidth, bool disconnect = true)
+ : Address(std::move(address))
+ , ForwardPort(forwardPort)
+ , ListenSocket()
+ , RejectingTrafficTimeout(rejectingTrafficTimeout)
+ , CurrentRejectingTimeout(rejectingTrafficTimeout)
+ , RejectingStateTimer()
+ , Bandwidth(bandwidth)
+ , Disconnect(disconnect)
+ , RejectingTraffic(false)
+ {
+ SetReuseAddressAndPort(ListenSocket);
+ TSockAddrInet6 addr(Address.data(), listenPort);
+ Y_VERIFY(ListenSocket.Bind(&addr) == 0);
+ Y_VERIFY(ListenSocket.Listen(5) == 0);
+
+ DelayTraffic = (Bandwidth == 0.0) ? false : true;
+
+ ForwardAddrress.Reset(new TSockAddrInet6(Address.data(), ForwardPort));
+ const ui32 BufSize = DelayTraffic ? 4096 : 65536 + 4096;
+ Buf.resize(BufSize);
+ }
+
+ ~TTrafficInterrupter() {
+ AtomicSet(Running, 0);
+ this->Join();
+ }
+
+private:
+ TAtomic Running = 1;
+ TVector<char> Buf;
+ TSocketPoller SocketPoller;
+ THolder<TSockAddrInet6> ForwardAddrress;
+ TVector<void*> Events;
+ TDuration RejectingTrafficTimeout;
+ TDuration CurrentRejectingTimeout;
+ TDuration DefaultPollTimeout = TDuration::MilliSeconds(100);
+ TDuration DisconnectTimeout = TDuration::MilliSeconds(100);
+ THPTimer RejectingStateTimer;
+ THPTimer DisconnectTimer;
+ double Bandwidth;
+ const bool Disconnect;
+ bool RejectingTraffic;
+ bool DelayTraffic;
+
+ void UpdateRejectingState() {
+ if (TDuration::Seconds(std::abs(RejectingStateTimer.Passed())) > CurrentRejectingTimeout) {
+ RejectingStateTimer.Reset();
+ CurrentRejectingTimeout = (RandomNumber<ui32>(1) ? RejectingTrafficTimeout + TDuration::Seconds(1.0) : RejectingTrafficTimeout - TDuration::Seconds(0.2));
+ RejectingTraffic = !RejectingTraffic;
+ }
+ }
+
+ void RandomlyDisconnect() {
+ if (TDuration::Seconds(std::abs(DisconnectTimer.Passed())) > DisconnectTimeout) {
+ DisconnectTimer.Reset();
+ if (RandomNumber<ui32>(100) > 90) {
+ if (!Connections.empty()) {
+ auto it = Connections.begin();
+ std::advance(it, RandomNumber<ui32>(Connections.size()));
+ SocketPoller.Unwait(static_cast<SOCKET>(*it->FirstSocket.get()));
+ SocketPoller.Unwait(static_cast<SOCKET>(*it->SecondSocket.get()));
+ Connections.erase(it);
+ }
+ }
+ }
+ }
+
+ void* ThreadProc() override {
+ int pollReadyCount = 0;
+ SocketPoller.WaitRead(static_cast<SOCKET>(ListenSocket), &ListenSocket);
+ Events.resize(10);
+
+ while (AtomicGet(Running)) {
+ if (RejectingTrafficTimeout != TDuration::Zero()) {
+ UpdateRejectingState();
+ }
+ if (Disconnect) {
+ RandomlyDisconnect();
+ }
+ if (!RejectingTraffic) {
+ TDuration timeout = DefaultPollTimeout;
+ auto updateTimout = [&timeout](TDirectedConnection& conn) {
+ if (conn.DelayedQueue) {
+ timeout = Min(timeout, conn.DelayedQueue.top().first - TInstant::Now());
+ }
+ };
+ for (auto& it : Connections) {
+ updateTimout(it.ForwardConnection);
+ updateTimout(it.BackwardConnection);
+ }
+ pollReadyCount = SocketPoller.WaitT(Events.data(), Events.size(), timeout);
+ if (pollReadyCount > 0) {
+ for (int i = 0; i < pollReadyCount; i++) {
+ HandleSocketPollEvent(Events[i]);
+ }
+ for (auto it : DroppedConnections) {
+ Connections.erase(it);
+ }
+ DroppedConnections.clear();
+ }
+ }
+ if (DelayTraffic) { // process packets from DelayQueues
+ auto processDelayedPackages = [](TDirectedConnection& conn) {
+ while (!conn.DelayedQueue.empty()) {
+ auto& frontPackage = conn.DelayedQueue.top();
+ if (TInstant::Now() >= frontPackage.first) {
+ TInet6StreamSocket* sock = frontPackage.second.ForwardSocket;
+ if (sock) {
+ sock->Send(frontPackage.second.Data.data(), frontPackage.second.Data.size());
+ }
+ conn.DelayedQueue.pop();
+ } else {
+ break;
+ }
+ }
+ };
+ for (auto& it : Connections) {
+ processDelayedPackages(it.ForwardConnection);
+ processDelayedPackages(it.BackwardConnection);
+ }
+ }
+ }
+ ListenSocket.Close();
+ return nullptr;
+ }
+
+ void HandleSocketPollEvent(void* ev) {
+ if (ev == static_cast<void*>(&ListenSocket)) {
+ TSockAddrInet6 origin;
+ Connections.emplace_back(TConnectionDescriptor(std::unique_ptr<TInet6StreamSocket>(new TInet6StreamSocket), std::unique_ptr<TInet6StreamSocket>(new TInet6StreamSocket)));
+ int err = ListenSocket.Accept(Connections.back().FirstSocket.get(), &origin);
+ if (!err) {
+ err = Connections.back().SecondSocket->Connect(ForwardAddrress.Get());
+ if (!err) {
+ Connections.back().ForwardConnection.ListIterator = --Connections.end();
+ Connections.back().BackwardConnection.ListIterator = --Connections.end();
+ SocketPoller.WaitRead(static_cast<SOCKET>(*Connections.back().FirstSocket), &Connections.back().ForwardConnection);
+ SocketPoller.WaitRead(static_cast<SOCKET>(*Connections.back().SecondSocket), &Connections.back().BackwardConnection);
+ } else {
+ Connections.back().FirstSocket->Close();
+ }
+ } else {
+ Connections.pop_back();
+ }
+ } else {
+ TDirectedConnection* directedConnection = static_cast<TDirectedConnection*>(ev);
+ int recvSize = 0;
+ do {
+ recvSize = directedConnection->Source->Recv(Buf.data(), Buf.size());
+ } while (recvSize == -EINTR);
+
+ if (recvSize > 0) {
+ if (DelayTraffic) {
+ // put packet into DelayQueue
+ const TDuration baseDelay = TDuration::MicroSeconds(recvSize * 1e6 / Bandwidth);
+ const TInstant now = TInstant::Now();
+ directedConnection->Timestamp = Max(now, directedConnection->Timestamp) + baseDelay;
+ TDelayedPacket pkt;
+ pkt.ForwardSocket = directedConnection->Destination;
+ pkt.Data.resize(recvSize);
+ memcpy(pkt.Data.data(), Buf.data(), recvSize);
+ directedConnection->DelayedQueue.emplace(directedConnection->Timestamp, std::move(pkt));
+ } else {
+ directedConnection->Destination->Send(Buf.data(), recvSize);
+ }
+ } else {
+ SocketPoller.Unwait(static_cast<SOCKET>(*directedConnection->Source));
+ SocketPoller.Unwait(static_cast<SOCKET>(*directedConnection->Destination));
+ DroppedConnections.emplace(directedConnection->ListIterator);
+ }
+ }
+ }
+};
diff --git a/library/cpp/actors/interconnect/ut/lib/node.h b/library/cpp/actors/interconnect/ut/lib/node.h
new file mode 100644
index 0000000000..ff30b1445e
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut/lib/node.h
@@ -0,0 +1,137 @@
+#pragma once
+
+#include <library/cpp/actors/core/actorsystem.h>
+#include <library/cpp/actors/core/executor_pool_basic.h>
+#include <library/cpp/actors/core/scheduler_basic.h>
+#include <library/cpp/actors/core/mailbox.h>
+#include <library/cpp/actors/dnsresolver/dnsresolver.h>
+
+#include <library/cpp/actors/interconnect/interconnect_tcp_server.h>
+#include <library/cpp/actors/interconnect/interconnect_tcp_proxy.h>
+#include <library/cpp/actors/interconnect/interconnect_proxy_wrapper.h>
+
+using namespace NActors;
+
+class TNode {
+ THolder<TActorSystem> ActorSystem;
+
+public:
+ TNode(ui32 nodeId, ui32 numNodes, const THashMap<ui32, ui16>& nodeToPort, const TString& address,
+ NMonitoring::TDynamicCounterPtr counters, TDuration deadPeerTimeout,
+ TChannelsConfig channelsSettings = TChannelsConfig(),
+ ui32 numDynamicNodes = 0, ui32 numThreads = 1) {
+ TActorSystemSetup setup;
+ setup.NodeId = nodeId;
+ setup.ExecutorsCount = 1;
+ setup.Executors.Reset(new TAutoPtr<IExecutorPool>[setup.ExecutorsCount]);
+ for (ui32 i = 0; i < setup.ExecutorsCount; ++i) {
+ setup.Executors[i].Reset(new TBasicExecutorPool(i, numThreads, 20 /* magic number */));
+ }
+ setup.Scheduler.Reset(new TBasicSchedulerThread());
+ const ui32 interconnectPoolId = 0;
+
+ auto common = MakeIntrusive<TInterconnectProxyCommon>();
+ common->NameserviceId = GetNameserviceActorId();
+ common->MonCounters = counters->GetSubgroup("nodeId", ToString(nodeId));
+ common->ChannelsConfig = channelsSettings;
+ common->ClusterUUID = "cluster";
+ common->AcceptUUID = {common->ClusterUUID};
+ common->TechnicalSelfHostName = address;
+ common->Settings.Handshake = TDuration::Seconds(1);
+ common->Settings.DeadPeer = deadPeerTimeout;
+ common->Settings.CloseOnIdle = TDuration::Minutes(1);
+ common->Settings.SendBufferDieLimitInMB = 512;
+ common->Settings.TotalInflightAmountOfData = 512 * 1024;
+ common->Settings.TCPSocketBufferSize = 2048 * 1024;
+
+ setup.Interconnect.ProxyActors.resize(numNodes + 1 - numDynamicNodes);
+ setup.Interconnect.ProxyWrapperFactory = CreateProxyWrapperFactory(common, interconnectPoolId);
+
+ for (ui32 i = 1; i <= numNodes; ++i) {
+ if (i == nodeId) {
+ // create listener actor for local node "nodeId"
+ setup.LocalServices.emplace_back(TActorId(), TActorSetupCmd(new TInterconnectListenerTCP(address,
+ nodeToPort.at(nodeId), common), TMailboxType::ReadAsFilled, interconnectPoolId));
+ } else if (i <= numNodes - numDynamicNodes) {
+ // create proxy actor to reach node "i"
+ setup.Interconnect.ProxyActors[i] = {new TInterconnectProxyTCP(i, common),
+ TMailboxType::ReadAsFilled, interconnectPoolId};
+ }
+ }
+
+ setup.LocalServices.emplace_back(MakePollerActorId(), TActorSetupCmd(CreatePollerActor(),
+ TMailboxType::ReadAsFilled, 0));
+
+ const TActorId loggerActorId(0, "logger");
+ constexpr ui32 LoggerComponentId = 410; // NKikimrServices::LOGGER
+
+ auto loggerSettings = MakeIntrusive<NLog::TSettings>(
+ loggerActorId,
+ (NLog::EComponent)LoggerComponentId,
+ NLog::PRI_INFO,
+ NLog::PRI_DEBUG,
+ 0U);
+
+ loggerSettings->Append(
+ NActorsServices::EServiceCommon_MIN,
+ NActorsServices::EServiceCommon_MAX,
+ NActorsServices::EServiceCommon_Name
+ );
+
+ constexpr ui32 WilsonComponentId = 430; // NKikimrServices::WILSON
+ static const TString WilsonComponentName = "WILSON";
+
+ loggerSettings->Append(
+ (NLog::EComponent)WilsonComponentId,
+ (NLog::EComponent)WilsonComponentId + 1,
+ [](NLog::EComponent) -> const TString & { return WilsonComponentName; });
+
+ // register nameserver table
+ auto names = MakeIntrusive<TTableNameserverSetup>();
+ for (ui32 i = 1; i <= numNodes; ++i) {
+ names->StaticNodeTable[i] = TTableNameserverSetup::TNodeInfo(address, address, nodeToPort.at(i));
+ }
+ setup.LocalServices.emplace_back(
+ NDnsResolver::MakeDnsResolverActorId(),
+ TActorSetupCmd(
+ NDnsResolver::CreateOnDemandDnsResolver(),
+ TMailboxType::ReadAsFilled, interconnectPoolId));
+ setup.LocalServices.emplace_back(GetNameserviceActorId(), TActorSetupCmd(
+ CreateNameserverTable(names, interconnectPoolId), TMailboxType::ReadAsFilled,
+ interconnectPoolId));
+
+ // register logger
+ setup.LocalServices.emplace_back(loggerActorId, TActorSetupCmd(new TLoggerActor(loggerSettings,
+ CreateStderrBackend(), counters->GetSubgroup("subsystem", "logger")),
+ TMailboxType::ReadAsFilled, interconnectPoolId));
+
+ auto sp = MakeHolder<TActorSystemSetup>(std::move(setup));
+ ActorSystem.Reset(new TActorSystem(sp, nullptr, loggerSettings));
+ ActorSystem->Start();
+ }
+
+ ~TNode() {
+ ActorSystem->Stop();
+ }
+
+ bool Send(const TActorId& recipient, IEventBase* ev) {
+ return ActorSystem->Send(recipient, ev);
+ }
+
+ TActorId RegisterActor(IActor* actor) {
+ return ActorSystem->Register(actor);
+ }
+
+ TActorId InterconnectProxy(ui32 peerNodeId) {
+ return ActorSystem->InterconnectProxy(peerNodeId);
+ }
+
+ void RegisterServiceActor(const TActorId& serviceId, IActor* actor) {
+ const TActorId actorId = ActorSystem->Register(actor);
+ ActorSystem->RegisterLocalService(serviceId, actorId);
+ }
+
+ TActorSystem *GetActorSystem() const {
+ return ActorSystem.Get();
+ }
+};
diff --git a/library/cpp/actors/interconnect/ut/lib/test_actors.h b/library/cpp/actors/interconnect/ut/lib/test_actors.h
new file mode 100644
index 0000000000..7591200471
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut/lib/test_actors.h
@@ -0,0 +1,83 @@
+#pragma once
+
+namespace NActors {
+ class TSenderBaseActor: public TActorBootstrapped<TSenderBaseActor> {
+ protected:
+ const TActorId RecipientActorId;
+ const ui32 Preload;
+ ui64 SequenceNumber = 0;
+ ui32 InFlySize = 0;
+
+ public:
+ TSenderBaseActor(const TActorId& recipientActorId, ui32 preload = 1)
+ : RecipientActorId(recipientActorId)
+ , Preload(preload)
+ {
+ }
+
+ virtual ~TSenderBaseActor() {
+ }
+
+ virtual void Bootstrap(const TActorContext& ctx) {
+ Become(&TSenderBaseActor::StateFunc);
+ ctx.Send(ctx.ExecutorThread.ActorSystem->InterconnectProxy(RecipientActorId.NodeId()), new TEvInterconnect::TEvConnectNode);
+ }
+
+ virtual void SendMessagesIfPossible(const TActorContext& ctx) {
+ while (InFlySize < Preload) {
+ SendMessage(ctx);
+ }
+ }
+
+ virtual void SendMessage(const TActorContext& /*ctx*/) {
+ ++SequenceNumber;
+ }
+
+ virtual void Handle(TEvents::TEvUndelivered::TPtr& /*ev*/, const TActorContext& ctx) {
+ SendMessage(ctx);
+ }
+
+ virtual void Handle(TEvTestResponse::TPtr& /*ev*/, const TActorContext& ctx) {
+ SendMessagesIfPossible(ctx);
+ }
+
+ void Handle(TEvInterconnect::TEvNodeConnected::TPtr& /*ev*/, const TActorContext& ctx) {
+ SendMessagesIfPossible(ctx);
+ }
+
+ void Handle(TEvInterconnect::TEvNodeDisconnected::TPtr& /*ev*/, const TActorContext& /*ctx*/) {
+ }
+
+ virtual void Handle(TEvents::TEvPoisonPill::TPtr& /*ev*/, const TActorContext& ctx) {
+ Die(ctx);
+ }
+
+ virtual STRICT_STFUNC(StateFunc,
+ HFunc(TEvTestResponse, Handle)
+ HFunc(TEvents::TEvUndelivered, Handle)
+ HFunc(TEvents::TEvPoisonPill, Handle)
+ HFunc(TEvInterconnect::TEvNodeConnected, Handle)
+ HFunc(TEvInterconnect::TEvNodeDisconnected, Handle)
+ )
+ };
+
+ class TReceiverBaseActor: public TActor<TReceiverBaseActor> {
+ protected:
+ ui64 ReceivedCount = 0;
+
+ public:
+ TReceiverBaseActor()
+ : TActor(&TReceiverBaseActor::StateFunc)
+ {
+ }
+
+ virtual ~TReceiverBaseActor() {
+ }
+
+ virtual STRICT_STFUNC(StateFunc,
+ HFunc(TEvTest, Handle)
+ )
+
+ virtual void Handle(TEvTest::TPtr& /*ev*/, const TActorContext& /*ctx*/) {}
+ };
+}
diff --git a/library/cpp/actors/interconnect/ut/lib/test_events.h b/library/cpp/actors/interconnect/ut/lib/test_events.h
new file mode 100644
index 0000000000..cd0d9e0152
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut/lib/test_events.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#include <library/cpp/actors/interconnect/ut/protos/interconnect_test.pb.h>
+
+namespace NActors {
+ enum {
+ EvTest = EventSpaceBegin(TEvents::ES_PRIVATE),
+ EvTestChan,
+ EvTestSmall,
+ EvTestLarge,
+ EvTestResponse,
+ };
+
+ struct TEvTest : TEventPB<TEvTest, NInterconnectTest::TEvTest, EvTest> {
+ TEvTest() = default;
+
+ TEvTest(ui64 sequenceNumber, const TString& payload) {
+ Record.SetSequenceNumber(sequenceNumber);
+ Record.SetPayload(payload);
+ }
+ };
+
+ struct TEvTestLarge : TEventPB<TEvTestLarge, NInterconnectTest::TEvTestLarge, EvTestLarge> {
+ TEvTestLarge() = default;
+
+ TEvTestLarge(ui64 sequenceNumber, const TString& payload) {
+ Record.SetSequenceNumber(sequenceNumber);
+ Record.SetPayload(payload);
+ }
+ };
+
+ struct TEvTestSmall : TEventPB<TEvTestSmall, NInterconnectTest::TEvTestSmall, EvTestSmall> {
+ TEvTestSmall() = default;
+
+ TEvTestSmall(ui64 sequenceNumber, const TString& payload) {
+ Record.SetSequenceNumber(sequenceNumber);
+ Record.SetPayload(payload);
+ }
+ };
+
+ struct TEvTestResponse : TEventPB<TEvTestResponse, NInterconnectTest::TEvTestResponse, EvTestResponse> {
+ TEvTestResponse() = default;
+
+ TEvTestResponse(ui64 confirmedSequenceNumber) {
+ Record.SetConfirmedSequenceNumber(confirmedSequenceNumber);
+ }
+ };
+
+}
diff --git a/library/cpp/actors/interconnect/ut/lib/ya.make b/library/cpp/actors/interconnect/ut/lib/ya.make
new file mode 100644
index 0000000000..80f45f364f
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut/lib/ya.make
@@ -0,0 +1,12 @@
+LIBRARY()
+
+OWNER(vkanaev)
+
+SRCS(
+ node.h
+ test_events.h
+ test_actors.h
+ ic_test_cluster.h
+)
+
+END()
diff --git a/library/cpp/actors/interconnect/ut/poller_actor_ut.cpp b/library/cpp/actors/interconnect/ut/poller_actor_ut.cpp
new file mode 100644
index 0000000000..23d846a2fd
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut/poller_actor_ut.cpp
@@ -0,0 +1,264 @@
+#include <library/cpp/actors/interconnect/poller_actor.h>
+#include <library/cpp/actors/testlib/test_runtime.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/network/pair.h>
+#include <util/network/socket.h>
+
+using namespace NActors;
+
+class TTestSocket: public TSharedDescriptor {
+public:
+ explicit TTestSocket(SOCKET fd)
+ : Fd_(fd)
+ {
+ }
+
+ int GetDescriptor() override {
+ return Fd_;
+ }
+
+private:
+ SOCKET Fd_;
+};
+using TTestSocketPtr = TIntrusivePtr<TTestSocket>;
+
+// create pair of connected, non-blocking sockets
+std::pair<TTestSocketPtr, TTestSocketPtr> NonBlockSockets() {
+ SOCKET fds[2];
+ SocketPair(fds);
+ SetNonBlock(fds[0]);
+ SetNonBlock(fds[1]);
+ return {MakeIntrusive<TTestSocket>(fds[0]), MakeIntrusive<TTestSocket>(fds[1])};
+}
+
+std::pair<TTestSocketPtr, TTestSocketPtr> TcpSockets() {
+ // create server (listening) socket
+ SOCKET server = socket(AF_INET, SOCK_STREAM, 0);
+ Y_VERIFY(server != -1, "socket() failed with %s", strerror(errno));
+
+ // bind it to local address with automatically picked port
+ sockaddr_in addr;
+ addr.sin_family = AF_INET;
+ addr.sin_port = 0;
+ addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ if (bind(server, (sockaddr*)&addr, sizeof(addr)) == -1) {
+ Y_FAIL("bind() failed with %s", strerror(errno));
+ } else if (listen(server, 1) == -1) {
+ Y_FAIL("listen() failed with %s", strerror(errno));
+ }
+
+ // obtain local address for client
+ socklen_t len = sizeof(addr);
+ if (getsockname(server, (sockaddr*)&addr, &len) == -1) {
+ Y_FAIL("getsockname() failed with %s", strerror(errno));
+ }
+
+ // create client socket
+ SOCKET client = socket(AF_INET, SOCK_STREAM, 0);
+ Y_VERIFY(client != -1, "socket() failed with %s", strerror(errno));
+
+ // connect to server
+ if (connect(client, (sockaddr*)&addr, len) == -1) {
+ Y_FAIL("connect() failed with %s", strerror(errno));
+ }
+
+ // accept connection from the other side
+ SOCKET accepted = accept(server, nullptr, nullptr);
+ Y_VERIFY(accepted != -1, "accept() failed with %s", strerror(errno));
+
+ // close server socket
+ closesocket(server);
+
+ return std::make_pair(MakeIntrusive<TTestSocket>(client), MakeIntrusive<TTestSocket>(accepted));
+}
+
+class TPollerActorTest: public TTestBase {
+ UNIT_TEST_SUITE(TPollerActorTest);
+ UNIT_TEST(Registration)
+ UNIT_TEST(ReadNotification)
+ UNIT_TEST(WriteNotification)
+ UNIT_TEST(HangupNotification)
+ UNIT_TEST_SUITE_END();
+
+public:
+ void SetUp() override {
+ ActorSystem_ = MakeHolder<TTestActorRuntimeBase>();
+ ActorSystem_->Initialize();
+
+ PollerId_ = ActorSystem_->Register(CreatePollerActor());
+
+ TDispatchOptions opts;
+ opts.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1);
+ ActorSystem_->DispatchEvents(opts);
+ }
+
+ void Registration() {
+ auto [s1, s2] = NonBlockSockets();
+ auto readerId = ActorSystem_->AllocateEdgeActor();
+ auto writerId = ActorSystem_->AllocateEdgeActor();
+
+ RegisterSocket(s1, readerId, writerId);
+
+ // reader should receive event after socket registration
+ TPollerToken::TPtr token;
+ {
+ auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerRegisterResult>(readerId);
+ token = ev->Get()->PollerToken;
+ }
+
+ // writer should receive event after socket registration
+ {
+ auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerRegisterResult>(writerId);
+ UNIT_ASSERT_EQUAL(token, ev->Get()->PollerToken);
+ }
+ }
+
+ void ReadNotification() {
+ auto [r, w] = NonBlockSockets();
+ auto clientId = ActorSystem_->AllocateEdgeActor();
+ RegisterSocket(r, clientId, {});
+
+ // notification after registration
+ TPollerToken::TPtr token;
+ {
+ auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerRegisterResult>(clientId);
+ token = ev->Get()->PollerToken;
+ }
+
+ char buf;
+
+ // data not ready yet for read
+ UNIT_ASSERT(read(r->GetDescriptor(), &buf, sizeof(buf)) == -1);
+ UNIT_ASSERT(errno == EWOULDBLOCK);
+
+ // request read poll
+ token->Request(true, false);
+
+ // write data
+ UNIT_ASSERT(write(w->GetDescriptor(), "x", 1) == 1);
+
+ // notification after socket become readable
+ {
+ auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerReady>(clientId);
+ UNIT_ASSERT_EQUAL(ev->Get()->Socket, r);
+ UNIT_ASSERT(ev->Get()->Read);
+ UNIT_ASSERT(!ev->Get()->Write);
+ }
+
+ // read data
+ UNIT_ASSERT(read(r->GetDescriptor(), &buf, sizeof(buf)) == 1);
+ UNIT_ASSERT_EQUAL('x', buf);
+
+ // no more data to read
+ UNIT_ASSERT(read(r->GetDescriptor(), &buf, sizeof(buf)) == -1);
+ UNIT_ASSERT(errno == EWOULDBLOCK);
+ }
+
+ void WriteNotification() {
+ auto [r, w] = TcpSockets();
+ auto clientId = ActorSystem_->AllocateEdgeActor();
+ SetNonBlock(w->GetDescriptor());
+ RegisterSocket(w, TActorId{}, clientId);
+
+ // notification after registration
+ TPollerToken::TPtr token;
+ {
+ auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerRegisterResult>(clientId);
+ token = ev->Get()->PollerToken;
+ }
+
+ char buffer[4096];
+ memset(buffer, 'x', sizeof(buffer));
+
+ for (int i = 0; i < 1000; ++i) {
+ // write as much as possible to send buffer
+ ssize_t written = 0;
+ for (;;) {
+ ssize_t res = send(w->GetDescriptor(), buffer, sizeof(buffer), 0);
+ if (res > 0) {
+ written += res;
+ } else if (res == 0) {
+ UNIT_FAIL("unexpected zero return from send()");
+ } else {
+ UNIT_ASSERT(res == -1);
+ if (errno == EINTR) {
+ continue;
+ } else if (errno == EWOULDBLOCK || errno == EAGAIN) {
+ token->Request(false, true);
+ break;
+ } else {
+ UNIT_FAIL("unexpected error from send()");
+ }
+ }
+ }
+ Cerr << "written " << written << " bytes" << Endl;
+
+ // read all written data from the read end
+ for (;;) {
+ char buffer[4096];
+ ssize_t res = recv(r->GetDescriptor(), buffer, sizeof(buffer), 0);
+ if (res > 0) {
+ UNIT_ASSERT(written >= res);
+ written -= res;
+ if (!written) {
+ break;
+ }
+ } else if (res == 0) {
+ UNIT_FAIL("unexpected zero return from recv()");
+ } else {
+ UNIT_ASSERT(res == -1);
+ if (errno == EINTR) {
+ continue;
+ } else {
+ UNIT_FAIL("unexpected error from recv()");
+ }
+ }
+ }
+
+ // wait for notification after socket becomes writable again
+ {
+ auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerReady>(clientId);
+ UNIT_ASSERT_EQUAL(ev->Get()->Socket, w);
+ UNIT_ASSERT(!ev->Get()->Read);
+ UNIT_ASSERT(ev->Get()->Write);
+ }
+ }
+ }
+
+ void HangupNotification() {
+ auto [r, w] = NonBlockSockets();
+ auto clientId = ActorSystem_->AllocateEdgeActor();
+ RegisterSocket(r, clientId, TActorId{});
+
+ // notification after registration
+ TPollerToken::TPtr token;
+ {
+ auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerRegisterResult>(clientId);
+ token = ev->Get()->PollerToken;
+ }
+
+ token->Request(true, false);
+ ShutDown(w->GetDescriptor(), SHUT_RDWR);
+
+ // notification after peer shuts down its socket
+ {
+ auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerReady>(clientId);
+ UNIT_ASSERT_EQUAL(ev->Get()->Socket, r);
+ UNIT_ASSERT(ev->Get()->Read);
+ }
+ }
+
+private:
+ void RegisterSocket(TTestSocketPtr socket, TActorId readActorId, TActorId writeActorId) {
+ auto ev = new TEvPollerRegister{socket, readActorId, writeActorId};
+ ActorSystem_->Send(new IEventHandle(PollerId_, TActorId{}, ev));
+ }
+
+private:
+ THolder<TTestActorRuntimeBase> ActorSystem_;
+ TActorId PollerId_;
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TPollerActorTest);
diff --git a/library/cpp/actors/interconnect/ut/protos/interconnect_test.proto b/library/cpp/actors/interconnect/ut/protos/interconnect_test.proto
new file mode 100644
index 0000000000..b9b2bd6a4e
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut/protos/interconnect_test.proto
@@ -0,0 +1,25 @@
+package NInterconnectTest;
+
+message TEvTest {
+ optional uint64 SequenceNumber = 1;
+ optional bytes Payload = 2;
+}
+
+message TEvTestChan {
+ optional uint64 SequenceNumber = 1;
+ optional uint64 Payload = 2;
+}
+
+message TEvTestLarge {
+ optional uint64 SequenceNumber = 1;
+ optional bytes Payload = 2;
+}
+
+message TEvTestSmall {
+ optional uint64 SequenceNumber = 1;
+ optional bytes Payload = 2;
+}
+
+message TEvTestResponse {
+ optional uint64 ConfirmedSequenceNumber = 1;
+}
diff --git a/library/cpp/actors/interconnect/ut/protos/ya.make b/library/cpp/actors/interconnect/ut/protos/ya.make
new file mode 100644
index 0000000000..48a8cc129f
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut/protos/ya.make
@@ -0,0 +1,11 @@
+PROTO_LIBRARY()
+
+OWNER(vkanaev)
+
+SRCS(
+ interconnect_test.proto
+)
+
+EXCLUDE_TAGS(GO_PROTO)
+
+END()
diff --git a/library/cpp/actors/interconnect/ut/ya.make b/library/cpp/actors/interconnect/ut/ya.make
new file mode 100644
index 0000000000..2f5b13352e
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut/ya.make
@@ -0,0 +1,36 @@
+UNITTEST()
+
+OWNER(
+ alexvru
+ g:kikimr
+)
+
+IF (SANITIZER_TYPE == "thread")
+ TIMEOUT(1200)
+ SIZE(LARGE)
+ TAG(ya:fat)
+ELSE()
+ TIMEOUT(600)
+ SIZE(MEDIUM)
+ENDIF()
+
+SRCS(
+ channel_scheduler_ut.cpp
+ event_holder_pool_ut.cpp
+ interconnect_ut.cpp
+ large.cpp
+ poller_actor_ut.cpp
+ dynamic_proxy_ut.cpp
+)
+
+PEERDIR(
+ library/cpp/actors/core
+ library/cpp/actors/interconnect
+ library/cpp/actors/interconnect/ut/lib
+ library/cpp/actors/interconnect/ut/protos
+ library/cpp/actors/testlib
+ library/cpp/digest/md5
+ library/cpp/testing/unittest
+)
+
+END()
diff --git a/library/cpp/actors/interconnect/ut_fat/main.cpp b/library/cpp/actors/interconnect/ut_fat/main.cpp
new file mode 100644
index 0000000000..5d19bc3003
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut_fat/main.cpp
@@ -0,0 +1,133 @@
+
+#include <library/cpp/actors/interconnect/interconnect_tcp_proxy.h>
+#include <library/cpp/actors/interconnect/ut/protos/interconnect_test.pb.h>
+#include <library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h>
+#include <library/cpp/actors/interconnect/ut/lib/interrupter.h>
+#include <library/cpp/actors/interconnect/ut/lib/test_events.h>
+#include <library/cpp/actors/interconnect/ut/lib/test_actors.h>
+#include <library/cpp/actors/interconnect/ut/lib/node.h>
+
+#include <library/cpp/testing/unittest/tests_data.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/network/sock.h>
+#include <util/network/poller.h>
+#include <util/system/atomic.h>
+#include <util/generic/set.h>
+
+Y_UNIT_TEST_SUITE(InterconnectUnstableConnection) {
+ using namespace NActors;
+
+ class TSenderActor: public TSenderBaseActor {
+ TDeque<ui64> InFly;
+ ui16 SendFlags;
+
+ public:
+ TSenderActor(const TActorId& recipientActorId, ui16 sendFlags)
+ : TSenderBaseActor(recipientActorId, 32)
+ , SendFlags(sendFlags)
+ {
+ }
+
+ ~TSenderActor() override {
+ Cerr << "Sent " << SequenceNumber << " messages\n";
+ }
+
+ void SendMessage(const TActorContext& ctx) override {
+ const ui32 flags = IEventHandle::MakeFlags(0, SendFlags);
+ const ui64 cookie = SequenceNumber;
+ const TString payload('@', RandomNumber<size_t>(65536) + 4096);
+ ctx.Send(RecipientActorId, new TEvTest(SequenceNumber, payload), flags, cookie);
+ InFly.push_back(SequenceNumber);
+ ++InFlySize;
+ ++SequenceNumber;
+ }
+
+ void Handle(TEvents::TEvUndelivered::TPtr& ev, const TActorContext& ctx) override {
+ auto record = std::find(InFly.begin(), InFly.end(), ev->Cookie);
+ if (SendFlags & IEventHandle::FlagGenerateUnsureUndelivered) {
+ if (record != InFly.end()) {
+ InFly.erase(record);
+ --InFlySize;
+ SendMessage(ctx);
+ }
+ } else {
+ Y_VERIFY(record != InFly.end());
+ }
+ }
+
+ void Handle(TEvTestResponse::TPtr& ev, const TActorContext& ctx) override {
+ Y_VERIFY(InFly);
+ const NInterconnectTest::TEvTestResponse& record = ev->Get()->Record;
+ Y_VERIFY(record.HasConfirmedSequenceNumber());
+ if (!(SendFlags & IEventHandle::FlagGenerateUnsureUndelivered)) {
+ while (record.GetConfirmedSequenceNumber() != InFly.front()) {
+ InFly.pop_front();
+ --InFlySize;
+ }
+ }
+ Y_VERIFY(record.GetConfirmedSequenceNumber() == InFly.front(), "got# %" PRIu64 " expected# %" PRIu64,
+ record.GetConfirmedSequenceNumber(), InFly.front());
+ InFly.pop_front();
+ --InFlySize;
+ SendMessagesIfPossible(ctx);
+ }
+ };
+
+ class TReceiverActor: public TReceiverBaseActor {
+ ui64 ReceivedCount = 0;
+ TNode* SenderNode = nullptr;
+
+ public:
+ TReceiverActor(TNode* senderNode)
+ : TReceiverBaseActor()
+ , SenderNode(senderNode)
+ {
+ }
+
+ void Handle(TEvTest::TPtr& ev, const TActorContext& /*ctx*/) override {
+ const NInterconnectTest::TEvTest& m = ev->Get()->Record;
+ Y_VERIFY(m.HasSequenceNumber());
+ Y_VERIFY(m.GetSequenceNumber() >= ReceivedCount, "got #%" PRIu64 " expected at least #%" PRIu64,
+ m.GetSequenceNumber(), ReceivedCount);
+ ++ReceivedCount;
+ SenderNode->Send(ev->Sender, new TEvTestResponse(m.GetSequenceNumber()));
+ }
+
+ ~TReceiverActor() override {
+ Cerr << "Received " << ReceivedCount << " messages\n";
+ }
+ };
+
+ Y_UNIT_TEST(InterconnectTestWithProxyUnsureUndelivered) {
+ ui32 numNodes = 2;
+ double bandWidth = 1000000;
+ ui16 flags = IEventHandle::FlagTrackDelivery | IEventHandle::FlagGenerateUnsureUndelivered;
+ TTestICCluster::TTrafficInterrupterSettings interrupterSettings{TDuration::Seconds(2), bandWidth, true};
+
+ TTestICCluster testCluster(numNodes, TChannelsConfig(), &interrupterSettings);
+
+ TReceiverActor* receiverActor = new TReceiverActor(testCluster.GetNode(1));
+ const TActorId recipient = testCluster.RegisterActor(receiverActor, 2);
+ TSenderActor* senderActor = new TSenderActor(recipient, flags);
+ testCluster.RegisterActor(senderActor, 1);
+
+ NanoSleep(30ULL * 1000 * 1000 * 1000);
+ }
+
+ Y_UNIT_TEST(InterconnectTestWithProxy) {
+ ui32 numNodes = 2;
+ double bandWidth = 1000000;
+ ui16 flags = IEventHandle::FlagTrackDelivery;
+ TTestICCluster::TTrafficInterrupterSettings interrupterSettings{TDuration::Seconds(2), bandWidth, true};
+
+ TTestICCluster testCluster(numNodes, TChannelsConfig(), &interrupterSettings);
+
+ TReceiverActor* receiverActor = new TReceiverActor(testCluster.GetNode(1));
+ const TActorId recipient = testCluster.RegisterActor(receiverActor, 2);
+ TSenderActor* senderActor = new TSenderActor(recipient, flags);
+ testCluster.RegisterActor(senderActor, 1);
+
+ NanoSleep(30ULL * 1000 * 1000 * 1000);
+ }
+}
diff --git a/library/cpp/actors/interconnect/ut_fat/ya.make b/library/cpp/actors/interconnect/ut_fat/ya.make
new file mode 100644
index 0000000000..6e58d08154
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut_fat/ya.make
@@ -0,0 +1,25 @@
+UNITTEST()
+
+OWNER(
+ vkanaev
+ alexvru
+)
+
+SIZE(LARGE)
+
+TAG(ya:fat)
+
+SRCS(
+ main.cpp
+)
+
+PEERDIR(
+ library/cpp/actors/core
+ library/cpp/actors/interconnect
+ library/cpp/actors/interconnect/mock
+ library/cpp/actors/interconnect/ut/lib
+ library/cpp/actors/interconnect/ut/protos
+ library/cpp/testing/unittest
+)
+
+END()
diff --git a/library/cpp/actors/interconnect/watchdog_timer.h b/library/cpp/actors/interconnect/watchdog_timer.h
new file mode 100644
index 0000000000..c190105a59
--- /dev/null
+++ b/library/cpp/actors/interconnect/watchdog_timer.h
@@ -0,0 +1,68 @@
+#pragma once
+
+namespace NActors {
+ template <typename TEvent>
+ class TWatchdogTimer {
+ using TCallback = std::function<void()>;
+
+ const TDuration Timeout;
+ const TCallback Callback;
+
+ TInstant LastResetTimestamp;
+ TEvent* ExpectedEvent = nullptr;
+ ui32 Iteration = 0;
+
+ static constexpr ui32 NumIterationsBeforeFiring = 2;
+
+ public:
+ TWatchdogTimer(TDuration timeout, TCallback callback)
+ : Timeout(timeout)
+ , Callback(std::move(callback))
+ {
+ }
+
+ void Arm(const TActorIdentity& actor) {
+ if (Timeout != TDuration::Zero() && Timeout != TDuration::Max()) {
+ Schedule(Timeout, actor);
+ Reset();
+ }
+ }
+
+ void Reset() {
+ LastResetTimestamp = TActivationContext::Now();
+ }
+
+ void Disarm() {
+ ExpectedEvent = nullptr;
+ }
+
+ void operator()(typename TEvent::TPtr& ev) {
+ if (ev->Get() == ExpectedEvent) {
+ const TInstant now = TActivationContext::Now();
+ const TInstant barrier = LastResetTimestamp + Timeout;
+ if (now < barrier) {
+ // the time hasn't come yet
+ Schedule(barrier - now, TActorIdentity(ev->Recipient));
+ } else if (Iteration < NumIterationsBeforeFiring) {
+ // time has come, but we will still give actor a chance to process some messages and rearm timer
+ ++Iteration;
+ TActivationContext::Send(ev.Release()); // send this event into queue once more
+ } else {
+ // no chance to disarm, fire callback
+ Callback();
+ ExpectedEvent = nullptr;
+ Iteration = 0;
+ }
+ }
+ }
+
+ private:
+ void Schedule(TDuration timeout, const TActorIdentity& actor) {
+ auto ev = MakeHolder<TEvent>();
+ ExpectedEvent = ev.Get();
+ Iteration = 0;
+ actor.Schedule(timeout, ev.Release());
+ }
+ };
+
+}
diff --git a/library/cpp/actors/interconnect/ya.make b/library/cpp/actors/interconnect/ya.make
new file mode 100644
index 0000000000..60d29b0fc0
--- /dev/null
+++ b/library/cpp/actors/interconnect/ya.make
@@ -0,0 +1,94 @@
+LIBRARY()
+
+OWNER(
+ ddoarn
+ alexvru
+ g:kikimr
+)
+
+NO_WSHADOW()
+
+IF (PROFILE_MEMORY_ALLOCATIONS)
+ CFLAGS(-DPROFILE_MEMORY_ALLOCATIONS)
+ENDIF()
+
+SRCS(
+ channel_scheduler.h
+ event_filter.h
+ event_holder_pool.h
+ events_local.h
+ interconnect_address.cpp
+ interconnect_address.h
+ interconnect_channel.cpp
+ interconnect_channel.h
+ interconnect_common.h
+ interconnect_counters.cpp
+ interconnect.h
+ interconnect_handshake.cpp
+ interconnect_handshake.h
+ interconnect_impl.h
+ interconnect_mon.cpp
+ interconnect_mon.h
+ interconnect_nameserver_dynamic.cpp
+ interconnect_nameserver_table.cpp
+ interconnect_proxy_wrapper.cpp
+ interconnect_proxy_wrapper.h
+ interconnect_resolve.cpp
+ interconnect_stream.cpp
+ interconnect_stream.h
+ interconnect_tcp_input_session.cpp
+ interconnect_tcp_proxy.cpp
+ interconnect_tcp_proxy.h
+ interconnect_tcp_server.cpp
+ interconnect_tcp_server.h
+ interconnect_tcp_session.cpp
+ interconnect_tcp_session.h
+ load.cpp
+ load.h
+ logging.h
+ packet.cpp
+ packet.h
+ poller_actor.cpp
+ poller_actor.h
+ poller.h
+ poller_tcp.cpp
+ poller_tcp.h
+ poller_tcp_unit.cpp
+ poller_tcp_unit.h
+ poller_tcp_unit_select.cpp
+ poller_tcp_unit_select.h
+ profiler.h
+ slowpoke_actor.h
+ types.cpp
+ types.h
+ watchdog_timer.h
+)
+
+IF (OS_LINUX)
+ SRCS(
+ poller_tcp_unit_epoll.cpp
+ poller_tcp_unit_epoll.h
+ )
+ENDIF()
+
+PEERDIR(
+ contrib/libs/libc_compat
+ contrib/libs/openssl
+ library/cpp/actors/core
+ library/cpp/actors/dnscachelib
+ library/cpp/actors/dnsresolver
+ library/cpp/actors/helpers
+ library/cpp/actors/prof
+ library/cpp/actors/protos
+ library/cpp/actors/util
+ library/cpp/digest/crc32c
+ library/cpp/json
+ library/cpp/lwtrace
+ library/cpp/monlib/dynamic_counters
+ library/cpp/monlib/metrics
+ library/cpp/monlib/service/pages/tablesorter
+ library/cpp/openssl/init
+ library/cpp/packedtypes
+)
+
+END()
diff --git a/library/cpp/actors/memory_log/memlog.cpp b/library/cpp/actors/memory_log/memlog.cpp
new file mode 100644
index 0000000000..8e6b46727d
--- /dev/null
+++ b/library/cpp/actors/memory_log/memlog.cpp
@@ -0,0 +1,367 @@
+#include "memlog.h"
+
+#include <library/cpp/actors/util/datetime.h>
+
+#include <util/system/info.h>
+#include <util/system/atomic.h>
+#include <util/system/align.h>
+
+#include <contrib/libs/linuxvdso/interface.h>
+
+#if (defined(_i386_) || defined(_x86_64_)) && defined(_linux_)
+#define HAVE_VDSO_GETCPU 1
+#include <contrib/libs/linuxvdso/interface.h>
+static int (*FastGetCpu)(unsigned* cpu, unsigned* node, void* unused);
+#endif
+
+#if defined(_unix_)
+#include <sched.h>
+#elif defined(_win_)
+#include <WinBase.h>
+#else
+#error NO IMPLEMENTATION FOR THE PLATFORM
+#endif
+
+const char TMemoryLog::DEFAULT_LAST_MARK[16] = {
+ 'c',
+ 'b',
+ '7',
+ 'B',
+ '6',
+ '8',
+ 'a',
+ '8',
+ 'A',
+ '5',
+ '6',
+ '1',
+ '6',
+ '4',
+ '5',
+ '\n',
+};
+
+const char TMemoryLog::CLEAR_MARK[16] = {
+ ' ',
+ ' ',
+ ' ',
+ ' ',
+ ' ',
+ ' ',
+ ' ',
+ ' ',
+ ' ',
+ ' ',
+ ' ',
+ ' ',
+ ' ',
+ ' ',
+ ' ',
+ '\n',
+};
+
+unsigned TMemoryLog::GetSelfCpu() noexcept {
+#if defined(_unix_)
+#if HAVE_VDSO_GETCPU
+ unsigned cpu;
+ if (Y_LIKELY(FastGetCpu != nullptr)) {
+ auto result = FastGetCpu(&cpu, nullptr, nullptr);
+ Y_VERIFY(result == 0);
+ return cpu;
+ } else {
+ return 0;
+ }
+
+#elif defined(_x86_64_) || defined(_i386_)
+
+#define CPUID(func, eax, ebx, ecx, edx) \
+ __asm__ __volatile__( \
+ "cpuid" \
+ : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) \
+ : "a"(func));
+
+ int a = 0, b = 0, c = 0, d = 0;
+ CPUID(0x1, a, b, c, d);
+ int acpiID = (b >> 24);
+ return acpiID;
+
+#elif defined(__CNUC__)
+ return sched_getcpu();
+#else
+ return 0;
+#endif
+
+#elif defined(_win_)
+ return GetCurrentProcessorNumber();
+#else
+ return 0;
+#endif
+}
+
+TMemoryLog* TMemoryLog::MemLogBuffer = nullptr;
+Y_POD_THREAD(TThread::TId)
+TMemoryLog::LogThreadId;
+char* TMemoryLog::LastMarkIsHere = nullptr;
+
+std::atomic<bool> TMemoryLog::PrintLastMark(true);
+
+TMemoryLog::TMemoryLog(size_t totalSize, size_t grainSize)
+ : GrainSize(grainSize)
+ , FreeGrains(DEFAULT_TOTAL_SIZE / DEFAULT_GRAIN_SIZE * 2)
+ , Buf(totalSize)
+{
+ Y_VERIFY(DEFAULT_TOTAL_SIZE % DEFAULT_GRAIN_SIZE == 0);
+ NumberOfGrains = DEFAULT_TOTAL_SIZE / DEFAULT_GRAIN_SIZE;
+
+ for (size_t i = 0; i < NumberOfGrains; ++i) {
+ new (GetGrain(i)) TGrain;
+ }
+
+ NumberOfCpus = NSystemInfo::NumberOfCpus();
+ Y_VERIFY(NumberOfGrains > NumberOfCpus);
+ ActiveGrains.Reset(new TGrain*[NumberOfCpus]);
+ for (size_t i = 0; i < NumberOfCpus; ++i) {
+ ActiveGrains[i] = GetGrain(i);
+ }
+
+ for (size_t i = NumberOfCpus; i < NumberOfGrains; ++i) {
+ FreeGrains.StubbornPush(GetGrain(i));
+ }
+
+#if HAVE_VDSO_GETCPU
+ auto vdsoFunc = (decltype(FastGetCpu))
+ NVdso::Function("__vdso_getcpu", "LINUX_2.6");
+ AtomicSet(FastGetCpu, vdsoFunc);
+#endif
+}
+
+void* TMemoryLog::GetWriteBuffer(size_t amount) noexcept {
+ // alignment required by NoCacheMemcpy
+ amount = AlignUp<size_t>(amount, MemcpyAlignment);
+
+ for (ui16 tries = MAX_GET_BUFFER_TRIES; tries-- > 0;) {
+ auto myCpu = GetSelfCpu();
+
+ TGrain* grain = AtomicGet(ActiveGrains[myCpu]);
+
+ if (grain != nullptr) {
+ auto mine = AtomicGetAndAdd(grain->WritePointer, amount);
+ if (mine + amount <= GrainSize - sizeof(TGrain)) {
+ return &grain->Data[mine];
+ }
+
+ if (!AtomicCas(&ActiveGrains[myCpu], 0, grain)) {
+ continue;
+ }
+
+ FreeGrains.StubbornPush(grain);
+ }
+
+ grain = (TGrain*)FreeGrains.Pop();
+
+ if (grain == nullptr) {
+ return nullptr;
+ }
+
+ grain->WritePointer = 0;
+
+ if (!AtomicCas(&ActiveGrains[myCpu], grain, 0)) {
+ FreeGrains.StubbornPush(grain);
+ continue;
+ }
+ }
+
+ return nullptr;
+}
+
+void ClearAlignedTail(char* tail) noexcept {
+ auto aligned = AlignUp(tail, TMemoryLog::MemcpyAlignment);
+ if (aligned > tail) {
+ memset(tail, 0, aligned - tail);
+ }
+}
+
+#if defined(_x86_64_) || defined(_i386_)
+#include <xmmintrin.h>
+// the main motivation is not poluting CPU cache
+NO_SANITIZE_THREAD
+void NoCacheMemcpy(char* dst, const char* src, size_t size) noexcept {
+ while (size >= sizeof(__m128) * 2) {
+ __m128 a = _mm_load_ps((float*)(src + 0 * sizeof(__m128)));
+ __m128 b = _mm_load_ps((float*)(src + 1 * sizeof(__m128)));
+ _mm_stream_ps((float*)(dst + 0 * sizeof(__m128)), a);
+ _mm_stream_ps((float*)(dst + 1 * sizeof(__m128)), b);
+
+ size -= sizeof(__m128) * 2;
+ src += sizeof(__m128) * 2;
+ dst += sizeof(__m128) * 2;
+ }
+ memcpy(dst, src, size);
+}
+
+NO_SANITIZE_THREAD
+void NoWCacheMemcpy(char* dst, const char* src, size_t size) noexcept {
+ constexpr ui16 ITEMS_COUNT = 1024;
+ alignas(TMemoryLog::MemcpyAlignment) __m128 buf[ITEMS_COUNT];
+ while (size >= sizeof(buf)) {
+ memcpy(&buf, src, sizeof(buf));
+
+ for (ui16 i = 0; i < ITEMS_COUNT; ++i) {
+ _mm_stream_ps((float*)dst, buf[i]);
+ dst += sizeof(__m128);
+ }
+
+ size -= sizeof(buf);
+ src += sizeof(buf);
+ }
+
+ memcpy(&buf, src, size);
+ // no problem to copy few bytes more
+ size = AlignUp(size, sizeof(__m128));
+ for (ui16 i = 0; i < size / sizeof(__m128); ++i) {
+ _mm_stream_ps((float*)dst, buf[i]);
+ dst += sizeof(__m128);
+ }
+}
+
+#endif
+
+NO_SANITIZE_THREAD
+char* BareMemLogWrite(const char* begin, size_t msgSize, bool isLast) noexcept {
+ bool lastMark =
+ isLast && TMemoryLog::PrintLastMark.load(std::memory_order_acquire);
+ size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize;
+
+ char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount);
+ if (buffer == nullptr) {
+ return nullptr;
+ }
+
+#if defined(_x86_64_) || defined(_i386_)
+ if (AlignDown(begin, TMemoryLog::MemcpyAlignment) == begin) {
+ NoCacheMemcpy(buffer, begin, msgSize);
+ } else {
+ NoWCacheMemcpy(buffer, begin, msgSize);
+ }
+#else
+ memcpy(buffer, begin, msgSize);
+#endif
+
+ if (lastMark) {
+ TMemoryLog::ChangeLastMark(buffer + msgSize);
+ }
+
+ ClearAlignedTail(buffer + amount);
+ return buffer;
+}
+
+NO_SANITIZE_THREAD
+bool MemLogWrite(const char* begin, size_t msgSize, bool addLF) noexcept {
+ bool lastMark = TMemoryLog::PrintLastMark.load(std::memory_order_acquire);
+ size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize;
+
+ // Let's construct prolog with timestamp and thread id
+ auto threadId = TMemoryLog::GetTheadId();
+
+ // alignment required by NoCacheMemcpy
+ // check for format for snprintf
+ constexpr size_t prologSize = 48;
+ alignas(TMemoryLog::MemcpyAlignment) char prolog[prologSize + 1];
+ Y_VERIFY(AlignDown(&prolog, TMemoryLog::MemcpyAlignment) == &prolog);
+
+ int snprintfResult = snprintf(prolog, prologSize + 1,
+ "TS %020" PRIu64 " TI %020" PRIu64 " ", GetCycleCountFast(), threadId);
+
+ if (snprintfResult < 0) {
+ return false;
+ }
+ Y_VERIFY(snprintfResult == prologSize);
+
+ amount += prologSize;
+ if (addLF) {
+ ++amount; // add 1 byte for \n at the end of the message
+ }
+
+ char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount);
+ if (buffer == nullptr) {
+ return false;
+ }
+
+#if defined(_x86_64_) || defined(_i386_)
+ // warning: copy prolog first to avoid corruption of the message
+ // by prolog tail
+ NoCacheMemcpy(buffer, prolog, prologSize);
+ if (AlignDown(begin + prologSize, TMemoryLog::MemcpyAlignment) == begin + prologSize) {
+ NoCacheMemcpy(buffer + prologSize, begin, msgSize);
+ } else {
+ NoWCacheMemcpy(buffer + prologSize, begin, msgSize);
+ }
+#else
+ memcpy(buffer, prolog, prologSize);
+ memcpy(buffer + prologSize, begin, msgSize);
+#endif
+
+ if (addLF) {
+ buffer[prologSize + msgSize] = '\n';
+ }
+
+ if (lastMark) {
+ TMemoryLog::ChangeLastMark(buffer + prologSize + msgSize + (int)addLF);
+ }
+
+ ClearAlignedTail(buffer + amount);
+ return true;
+}
+
+NO_SANITIZE_THREAD
+void TMemoryLog::ChangeLastMark(char* buffer) noexcept {
+ memcpy(buffer, DEFAULT_LAST_MARK, LAST_MARK_SIZE);
+ auto oldMark = AtomicSwap(&LastMarkIsHere, buffer);
+ if (Y_LIKELY(oldMark != nullptr)) {
+ memcpy(oldMark, CLEAR_MARK, LAST_MARK_SIZE);
+ }
+ if (AtomicGet(LastMarkIsHere) != buffer) {
+ memcpy(buffer, CLEAR_MARK, LAST_MARK_SIZE);
+ AtomicBarrier();
+ }
+}
+
+bool MemLogVPrintF(const char* format, va_list params) noexcept {
+ auto logger = TMemoryLog::GetMemoryLogger();
+ if (logger == nullptr) {
+ return false;
+ }
+
+ auto threadId = TMemoryLog::GetTheadId();
+
+ // alignment required by NoCacheMemcpy
+ alignas(TMemoryLog::MemcpyAlignment) char buf[TMemoryLog::MAX_MESSAGE_SIZE];
+ Y_VERIFY(AlignDown(&buf, TMemoryLog::MemcpyAlignment) == &buf);
+
+ int prologSize = snprintf(buf,
+ TMemoryLog::MAX_MESSAGE_SIZE - 2,
+ "TS %020" PRIu64 " TI %020" PRIu64 " ",
+ GetCycleCountFast(),
+ threadId);
+
+ if (Y_UNLIKELY(prologSize < 0)) {
+ return false;
+ }
+ Y_VERIFY((ui32)prologSize <= TMemoryLog::MAX_MESSAGE_SIZE);
+
+ int add = vsnprintf(
+ &buf[prologSize],
+ TMemoryLog::MAX_MESSAGE_SIZE - prologSize - 2,
+ format, params);
+
+ if (Y_UNLIKELY(add < 0)) {
+ return false;
+ }
+ Y_VERIFY(add >= 0);
+ auto totalSize = prologSize + add;
+
+ buf[totalSize++] = '\n';
+ Y_VERIFY((ui32)totalSize <= TMemoryLog::MAX_MESSAGE_SIZE);
+
+ return BareMemLogWrite(buf, totalSize) != nullptr;
+}
diff --git a/library/cpp/actors/memory_log/memlog.h b/library/cpp/actors/memory_log/memlog.h
new file mode 100644
index 0000000000..2aa27272a6
--- /dev/null
+++ b/library/cpp/actors/memory_log/memlog.h
@@ -0,0 +1,211 @@
+#pragma once
+
+#include <library/cpp/threading/queue/mpmc_unordered_ring.h>
+#include <util/generic/string.h>
+#include <util/string/printf.h>
+#include <util/system/datetime.h>
+#include <util/system/thread.h>
+#include <util/system/types.h>
+#include <util/system/atomic.h>
+#include <util/system/align.h>
+#include <util/system/tls.h>
+
+#include <atomic>
+#include <cstdio>
+
+#ifdef _win_
+#include <util/system/winint.h>
+#endif
+
+#ifndef NO_SANITIZE_THREAD
+#define NO_SANITIZE_THREAD
+#if defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#undef NO_SANITIZE_THREAD
+#define NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
+#endif
+#endif
+#endif
+
+class TMemoryLog {
+public:
+ static constexpr size_t DEFAULT_TOTAL_SIZE = 10 * 1024 * 1024;
+ static constexpr size_t DEFAULT_GRAIN_SIZE = 1024 * 64;
+ static constexpr size_t MAX_MESSAGE_SIZE = 1024;
+ static constexpr ui16 MAX_GET_BUFFER_TRIES = 4;
+ static constexpr ui16 MemcpyAlignment = 16;
+
+ // search for cb7B68a8A561645
+ static const char DEFAULT_LAST_MARK[16];
+ static const char CLEAR_MARK[16];
+
+ static constexpr size_t LAST_MARK_SIZE = sizeof(DEFAULT_LAST_MARK);
+
+ inline static TMemoryLog* GetMemoryLogger() noexcept {
+ return AtomicGet(MemLogBuffer);
+ }
+
+ void* GetWriteBuffer(size_t amount) noexcept;
+
+ inline static void* GetWriteBufferStatic(size_t amount) noexcept {
+ auto logger = GetMemoryLogger();
+ if (logger == nullptr) {
+ return nullptr;
+ }
+ return logger->GetWriteBuffer(amount);
+ }
+
+ size_t GetGlobalBufferSize() const noexcept {
+ return Buf.GetSize();
+ }
+
+ inline static void CreateMemoryLogBuffer(
+ size_t totalSize = DEFAULT_TOTAL_SIZE,
+ size_t grainSize = DEFAULT_GRAIN_SIZE)
+ Y_COLD {
+ if (AtomicGet(MemLogBuffer) != nullptr) {
+ return;
+ }
+
+ AtomicSet(MemLogBuffer, new TMemoryLog(totalSize, grainSize));
+ }
+
+ static std::atomic<bool> PrintLastMark;
+
+ // buffer must be at least 16 bytes
+ static void ChangeLastMark(char* buffer) noexcept;
+
+ inline static TThread::TId GetTheadId() noexcept {
+ if (LogThreadId == 0) {
+ LogThreadId = TThread::CurrentThreadId();
+ }
+ return LogThreadId;
+ }
+
+private:
+ TMemoryLog(size_t totalSize, size_t grainSize) Y_COLD;
+
+ struct TGrain {
+ TAtomic WritePointer = 0;
+ char Padding[MemcpyAlignment - sizeof(TAtomic)];
+ char Data[];
+ };
+
+ size_t NumberOfCpus;
+ size_t GrainSize;
+ size_t NumberOfGrains;
+ TArrayPtr<TGrain*> ActiveGrains;
+ NThreading::TMPMCUnorderedRing FreeGrains;
+
+ TGrain* GetGrain(size_t grainIndex) const noexcept {
+ return (TGrain*)((char*)GetGlobalBuffer() + GrainSize * grainIndex);
+ }
+
+ class TMMapArea {
+ public:
+ TMMapArea(size_t amount) Y_COLD {
+ MMap(amount);
+ }
+
+ TMMapArea(const TMMapArea&) = delete;
+ TMMapArea& operator=(const TMMapArea& copy) = delete;
+
+ TMMapArea(TMMapArea&& move) Y_COLD {
+ BufPtr = move.BufPtr;
+ Size = move.Size;
+
+ move.BufPtr = nullptr;
+ move.Size = 0;
+ }
+
+ TMMapArea& operator=(TMMapArea&& move) Y_COLD {
+ BufPtr = move.BufPtr;
+ Size = move.Size;
+
+ move.BufPtr = nullptr;
+ move.Size = 0;
+ return *this;
+ }
+
+ void Reset(size_t amount) Y_COLD {
+ MUnmap();
+ MMap(amount);
+ }
+
+ ~TMMapArea() noexcept Y_COLD {
+ MUnmap();
+ }
+
+ size_t GetSize() const noexcept {
+ return Size;
+ }
+
+ void* GetPtr() const noexcept {
+ return BufPtr;
+ }
+
+ private:
+ void* BufPtr;
+ size_t Size;
+#ifdef _win_
+ HANDLE Mapping;
+#endif
+
+ void MMap(size_t amount);
+ void MUnmap();
+ };
+
+ TMMapArea Buf;
+
+ void* GetGlobalBuffer() const noexcept {
+ return Buf.GetPtr();
+ }
+
+ static unsigned GetSelfCpu() noexcept;
+
+ static TMemoryLog* MemLogBuffer;
+ static Y_POD_THREAD(TThread::TId) LogThreadId;
+ static char* LastMarkIsHere;
+};
+
+// it's no use of sanitizing this function
+NO_SANITIZE_THREAD
+char* BareMemLogWrite(
+ const char* begin, size_t msgSize, bool isLast = true) noexcept;
+
+// it's no use of sanitizing this function
+NO_SANITIZE_THREAD
+bool MemLogWrite(
+ const char* begin, size_t msgSize, bool addLF = false) noexcept;
+
+Y_WRAPPER inline bool MemLogWrite(const char* begin, const char* end) noexcept {
+ if (end <= begin) {
+ return false;
+ }
+
+ size_t msgSize = end - begin;
+ return MemLogWrite(begin, msgSize);
+}
+
+template <typename TObj>
+bool MemLogWriteStruct(const TObj* obj) noexcept {
+ auto begin = (const char*)(const void*)obj;
+ return MemLogWrite(begin, begin + sizeof(TObj));
+}
+
+Y_PRINTF_FORMAT(1, 0)
+bool MemLogVPrintF(const char* format, va_list params) noexcept;
+
+Y_PRINTF_FORMAT(1, 2)
+Y_WRAPPER
+inline bool MemLogPrintF(const char* format, ...) noexcept {
+ va_list params;
+ va_start(params, format);
+ auto result = MemLogVPrintF(format, params);
+ va_end(params);
+ return result;
+}
+
+Y_WRAPPER inline bool MemLogWriteNullTerm(const char* str) noexcept {
+ return MemLogWrite(str, strlen(str));
+}
diff --git a/library/cpp/actors/memory_log/mmap.cpp b/library/cpp/actors/memory_log/mmap.cpp
new file mode 100644
index 0000000000..201998d343
--- /dev/null
+++ b/library/cpp/actors/memory_log/mmap.cpp
@@ -0,0 +1,63 @@
+#include "memlog.h"
+
+#if defined(_unix_)
+#include <sys/mman.h>
+#elif defined(_win_)
+#include <util/system/winint.h>
+#else
+#error NO IMPLEMENTATION FOR THE PLATFORM
+#endif
+
+void TMemoryLog::TMMapArea::MMap(size_t amount) {
+ Y_VERIFY(amount > 0);
+
+#if defined(_unix_)
+ constexpr int mmapProt = PROT_READ | PROT_WRITE;
+#if defined(_linux_)
+ constexpr int mmapFlags = MAP_PRIVATE | MAP_ANON | MAP_POPULATE;
+#else
+ constexpr int mmapFlags = MAP_PRIVATE | MAP_ANON;
+#endif
+
+ BufPtr = ::mmap(nullptr, amount, mmapProt, mmapFlags, -1, 0);
+ if (BufPtr == MAP_FAILED) {
+ throw std::bad_alloc();
+ }
+
+#elif defined(_win_)
+ Mapping = ::CreateFileMapping(
+ (HANDLE)-1, nullptr, PAGE_READWRITE, 0, amount, nullptr);
+ if (Mapping == NULL) {
+ throw std::bad_alloc();
+ }
+ BufPtr = ::MapViewOfFile(Mapping, FILE_MAP_WRITE, 0, 0, amount);
+ if (BufPtr == NULL) {
+ throw std::bad_alloc();
+ }
+#endif
+
+ Size = amount;
+}
+
+void TMemoryLog::TMMapArea::MUnmap() {
+ if (BufPtr == nullptr) {
+ return;
+ }
+
+#if defined(_unix_)
+ int result = ::munmap(BufPtr, Size);
+ Y_VERIFY(result == 0);
+
+#elif defined(_win_)
+ BOOL result = ::UnmapViewOfFile(BufPtr);
+ Y_VERIFY(result != 0);
+
+ result = ::CloseHandle(Mapping);
+ Y_VERIFY(result != 0);
+
+ Mapping = 0;
+#endif
+
+ BufPtr = nullptr;
+ Size = 0;
+}
diff --git a/library/cpp/actors/memory_log/ya.make b/library/cpp/actors/memory_log/ya.make
new file mode 100644
index 0000000000..d89d5db4d7
--- /dev/null
+++ b/library/cpp/actors/memory_log/ya.make
@@ -0,0 +1,19 @@
+LIBRARY()
+
+OWNER(
+ agri
+ g:kikimr
+)
+
+SRCS(
+ memlog.cpp
+ memlog.h
+ mmap.cpp
+)
+
+PEERDIR(
+ library/cpp/threading/queue
+ contrib/libs/linuxvdso
+)
+
+END()
diff --git a/library/cpp/actors/prof/tag.cpp b/library/cpp/actors/prof/tag.cpp
new file mode 100644
index 0000000000..9ccf03e1a9
--- /dev/null
+++ b/library/cpp/actors/prof/tag.cpp
@@ -0,0 +1,119 @@
+#include "tag.h"
+#include "tcmalloc.h"
+
+#include <library/cpp/charset/ci_string.h>
+#include <library/cpp/containers/atomizer/atomizer.h>
+#include <library/cpp/malloc/api/malloc.h>
+
+#if defined(PROFILE_MEMORY_ALLOCATIONS)
+#include <library/cpp/lfalloc/dbg_info/dbg_info.h>
+#include <library/cpp/ytalloc/api/ytalloc.h>
+#endif
+
+#include <util/generic/singleton.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/system/mutex.h>
+
+namespace NProfiling {
+ class TStringAtoms {
+ private:
+ TMutex Mutex;
+ atomizer<ci_hash, ci_equal_to> Tags;
+
+ public:
+ static TStringAtoms& Instance() {
+ return *Singleton<TStringAtoms>();
+ }
+
+ ui32 MakeTag(const char* s) {
+ Y_VERIFY(s);
+ with_lock (Mutex) {
+ return Tags.string_to_atom(s);
+ }
+ }
+
+ ui32 MakeTags(const TVector<const char*>& ss) {
+ Y_VERIFY(ss);
+ with_lock (Mutex) {
+ ui32 baseTag = Tags.string_to_atom(ss[0]);
+ ui32 nextTag = baseTag + 1;
+ for (auto i = ss.begin() + 1; i != ss.end(); ++i, ++nextTag) {
+ Y_VERIFY(*i);
+ ui32 ctag = Tags.string_to_atom(*i);
+ Y_VERIFY(ctag == nextTag);
+ }
+ return baseTag;
+ }
+ }
+
+ const char* GetTag(ui32 tag) const {
+ with_lock (Mutex) {
+ return Tags.get_atom_name(tag);
+ }
+ }
+
+ size_t GetTagsCount() const {
+ with_lock (Mutex) {
+ return Tags.size();
+ }
+ }
+ };
+
+ ui32 MakeTag(const char* s) {
+ return TStringAtoms::Instance().MakeTag(s);
+ }
+
+ ui32 MakeTags(const TVector<const char*>& ss) {
+ return TStringAtoms::Instance().MakeTags(ss);
+ }
+
+ const char* GetTag(ui32 tag) {
+ return TStringAtoms::Instance().GetTag(tag);
+ }
+
+ size_t GetTagsCount() {
+ return TStringAtoms::Instance().GetTagsCount();
+ }
+
+ static ui32 SetThreadAllocTag_Default(ui32 tag) {
+ Y_UNUSED(tag);
+ return 0;
+ }
+
+#if defined(PROFILE_MEMORY_ALLOCATIONS)
+ static ui32 SetThreadAllocTag_YT(ui32 tag) {
+ auto prev = NYT::NYTAlloc::GetCurrentMemoryTag();
+ NYT::NYTAlloc::SetCurrentMemoryTag(tag);
+ return prev;
+ }
+
+ static TSetThreadAllocTag* SetThreadAllocTagFn() {
+ const auto& info = NMalloc::MallocInfo();
+
+ TStringBuf name(info.Name);
+ if (name.StartsWith("lf")) {
+ return (TSetThreadAllocTag*)NAllocDbg::SetThreadAllocTag;
+ } else if (name.StartsWith("yt")) {
+ return SetThreadAllocTag_YT;
+ } else if (name.StartsWith("tc")) {
+ return SetTCMallocThreadAllocTag;
+ } else {
+ return SetThreadAllocTag_Default;
+ }
+ }
+#else
+ static TSetThreadAllocTag* SetThreadAllocTagFn() {
+ const auto& info = NMalloc::MallocInfo();
+
+ TStringBuf name(info.Name);
+ if (name.StartsWith("tc")) {
+ return SetTCMallocThreadAllocTag;
+ } else {
+ return SetThreadAllocTag_Default;
+ }
+ }
+#endif
+
+ TSetThreadAllocTag* SetThreadAllocTag = SetThreadAllocTagFn();
+}
diff --git a/library/cpp/actors/prof/tag.h b/library/cpp/actors/prof/tag.h
new file mode 100644
index 0000000000..357e264a22
--- /dev/null
+++ b/library/cpp/actors/prof/tag.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+
+/*
+ Common registry for tagging memory profiler.
+ Register a new tag with MakeTag using a unique string.
+ Use registered tags with SetThreadAllocTag function in allocator API.
+*/
+
+namespace NProfiling {
+ ui32 MakeTag(const char* s);
+
+ // Make only unique tags. Y_VERIFY inside.
+ ui32 MakeTags(const TVector<const char*>& ss);
+
+ const char* GetTag(ui32 tag);
+ size_t GetTagsCount();
+
+ using TSetThreadAllocTag = ui32(ui32 tag);
+ extern TSetThreadAllocTag* SetThreadAllocTag;
+
+ class TMemoryTagScope {
+ public:
+ explicit TMemoryTagScope(ui32 tag)
+ : RestoreTag(SetThreadAllocTag(tag))
+ {
+ }
+
+ explicit TMemoryTagScope(const char* tagName) {
+ ui32 newTag = MakeTag(tagName);
+ RestoreTag = SetThreadAllocTag(newTag);
+ }
+
+ TMemoryTagScope(TMemoryTagScope&& move)
+ : RestoreTag(move.RestoreTag)
+ , Released(move.Released)
+ {
+ move.Released = true;
+ }
+
+ TMemoryTagScope& operator=(TMemoryTagScope&& move) {
+ RestoreTag = move.RestoreTag;
+ Released = move.Released;
+ move.Released = true;
+ return *this;
+ }
+
+ static void Reset(ui32 tag) {
+ SetThreadAllocTag(tag);
+ }
+
+ void Release() {
+ if (!Released) {
+ SetThreadAllocTag(RestoreTag);
+ Released = true;
+ }
+ }
+
+ ~TMemoryTagScope() {
+ if (!Released) {
+ SetThreadAllocTag(RestoreTag);
+ }
+ }
+
+ protected:
+ TMemoryTagScope(const TMemoryTagScope&) = delete;
+ void operator=(const TMemoryTagScope&) = delete;
+
+ ui32 RestoreTag = 0;
+ bool Released = false;
+ };
+}
diff --git a/library/cpp/actors/prof/tcmalloc.cpp b/library/cpp/actors/prof/tcmalloc.cpp
new file mode 100644
index 0000000000..3d4f203dbb
--- /dev/null
+++ b/library/cpp/actors/prof/tcmalloc.cpp
@@ -0,0 +1,32 @@
+#include "tcmalloc.h"
+
+#include <contrib/libs/tcmalloc/tcmalloc/malloc_extension.h>
+
+namespace NProfiling {
+
+static thread_local ui32 AllocationTag = 0;
+
+static struct TInitTCMallocCallbacks {
+ static void* CreateTag() {
+ return reinterpret_cast<void*>(AllocationTag);
+ }
+ static void* CopyTag(void* tag) {
+ return tag;
+ }
+ static void DestroyTag(void* tag) {
+ Y_UNUSED(tag);
+ }
+
+ TInitTCMallocCallbacks() {
+ tcmalloc::MallocExtension::SetSampleUserDataCallbacks(
+ CreateTag, CopyTag, DestroyTag);
+ }
+} InitTCMallocCallbacks;
+
+ui32 SetTCMallocThreadAllocTag(ui32 tag) {
+ ui32 prev = AllocationTag;
+ AllocationTag = tag;
+ return prev;
+}
+
+}
diff --git a/library/cpp/actors/prof/tcmalloc.h b/library/cpp/actors/prof/tcmalloc.h
new file mode 100644
index 0000000000..659fb4eaf3
--- /dev/null
+++ b/library/cpp/actors/prof/tcmalloc.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+
+namespace NProfiling {
+
+ui32 SetTCMallocThreadAllocTag(ui32 tag);
+
+}
diff --git a/library/cpp/actors/prof/tcmalloc_null.cpp b/library/cpp/actors/prof/tcmalloc_null.cpp
new file mode 100644
index 0000000000..75c0013154
--- /dev/null
+++ b/library/cpp/actors/prof/tcmalloc_null.cpp
@@ -0,0 +1,10 @@
+#include "tcmalloc.h"
+
+namespace NProfiling {
+
+ui32 SetTCMallocThreadAllocTag(ui32 tag) {
+ Y_UNUSED(tag);
+ return 0;
+}
+
+}
diff --git a/library/cpp/actors/prof/ut/tag_ut.cpp b/library/cpp/actors/prof/ut/tag_ut.cpp
new file mode 100644
index 0000000000..accf3921ab
--- /dev/null
+++ b/library/cpp/actors/prof/ut/tag_ut.cpp
@@ -0,0 +1,68 @@
+#include "tag.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NProfiling;
+
+class TAtomTagsTest: public TTestBase {
+private:
+ UNIT_TEST_SUITE(TAtomTagsTest);
+ UNIT_TEST(Test_MakeTag);
+ UNIT_TEST(Test_Make2Tags);
+ UNIT_TEST(Test_MakeTagTwice);
+
+ UNIT_TEST(Test_MakeAndGetTag);
+
+ UNIT_TEST(Test_MakeVector);
+ UNIT_TEST_SUITE_END();
+
+public:
+ void Test_MakeTag();
+ void Test_Make2Tags();
+ void Test_MakeTagTwice();
+ void Test_MakeAndGetTag();
+ void Test_MakeVector();
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TAtomTagsTest);
+
+void TAtomTagsTest::Test_MakeTag() {
+ ui32 tag = MakeTag("a tag");
+ UNIT_ASSERT(tag != 0);
+}
+
+void TAtomTagsTest::Test_Make2Tags() {
+ ui32 tag1 = MakeTag("a tag 1");
+ ui32 tag2 = MakeTag("a tag 2");
+ UNIT_ASSERT(tag1 != 0);
+ UNIT_ASSERT(tag2 != 0);
+ UNIT_ASSERT(tag1 != tag2);
+}
+
+void TAtomTagsTest::Test_MakeTagTwice() {
+ ui32 tag1 = MakeTag("a tag twice");
+ ui32 tag2 = MakeTag("a tag twice");
+ UNIT_ASSERT(tag1 != 0);
+ UNIT_ASSERT(tag1 == tag2);
+}
+
+void TAtomTagsTest::Test_MakeAndGetTag() {
+ const char* makeStr = "tag to get";
+ ui32 tag = MakeTag(makeStr);
+ const char* tagStr = GetTag(tag);
+ UNIT_ASSERT_STRINGS_EQUAL(makeStr, tagStr);
+}
+
+void TAtomTagsTest::Test_MakeVector() {
+ TVector<const char*> strs = {
+ "vector tag 0",
+ "vector tag 1",
+ "vector tag 3",
+ "vector tag 4"};
+ ui32 baseTag = MakeTags(strs);
+ UNIT_ASSERT(baseTag != 0);
+ for (ui32 i = 0; i < strs.size(); ++i) {
+ const char* str = GetTag(baseTag + i);
+ UNIT_ASSERT_STRINGS_EQUAL(str, strs[i]);
+ }
+}
diff --git a/library/cpp/actors/prof/ut/ya.make b/library/cpp/actors/prof/ut/ya.make
new file mode 100644
index 0000000000..47c58a8fb7
--- /dev/null
+++ b/library/cpp/actors/prof/ut/ya.make
@@ -0,0 +1,12 @@
+UNITTEST_FOR(library/cpp/actors/prof)
+
+OWNER(
+ agri
+ g:kikimr
+)
+
+SRCS(
+ tag_ut.cpp
+)
+
+END()
diff --git a/library/cpp/actors/prof/ya.make b/library/cpp/actors/prof/ya.make
new file mode 100644
index 0000000000..b5e2497563
--- /dev/null
+++ b/library/cpp/actors/prof/ya.make
@@ -0,0 +1,33 @@
+LIBRARY()
+
+OWNER(
+ agri
+ g:kikimr
+)
+
+SRCS(
+ tag.cpp
+)
+
+PEERDIR(
+ library/cpp/charset
+ library/cpp/containers/atomizer
+)
+
+IF (PROFILE_MEMORY_ALLOCATIONS)
+ CFLAGS(-DPROFILE_MEMORY_ALLOCATIONS)
+ PEERDIR(
+ library/cpp/malloc/api
+ library/cpp/lfalloc/dbg_info
+ library/cpp/ytalloc/api
+ )
+ENDIF()
+
+IF(ALLOCATOR == "TCMALLOC_256K")
+ SRCS(tcmalloc.cpp)
+ PEERDIR(contrib/libs/tcmalloc)
+ELSE()
+ SRCS(tcmalloc_null.cpp)
+ENDIF()
+
+END()
diff --git a/library/cpp/actors/protos/actors.proto b/library/cpp/actors/protos/actors.proto
new file mode 100644
index 0000000000..5fbd6d44ee
--- /dev/null
+++ b/library/cpp/actors/protos/actors.proto
@@ -0,0 +1,13 @@
+package NActorsProto;
+option java_package = "ru.yandex.kikimr.proto";
+option java_outer_classname = "NActorsBaseProto";
+
+message TActorId {
+ required fixed64 RawX1 = 1;
+ required fixed64 RawX2 = 2;
+}
+
+message TCallbackException {
+ required TActorId ActorId = 1;
+ required string ExceptionMessage = 2;
+}
diff --git a/library/cpp/actors/protos/interconnect.proto b/library/cpp/actors/protos/interconnect.proto
new file mode 100644
index 0000000000..2e3b0d0d15
--- /dev/null
+++ b/library/cpp/actors/protos/interconnect.proto
@@ -0,0 +1,113 @@
+import "library/cpp/actors/protos/actors.proto";
+import "google/protobuf/descriptor.proto";
+
+package NActorsInterconnect;
+option java_package = "ru.yandex.kikimr.proto";
+
+message TEvResolveNode {
+ optional uint32 NodeId = 1;
+ optional uint64 Deadline = 2;
+}
+
+message TEvNodeInfo {
+ optional uint32 NodeId = 1;
+ optional string Address = 2;
+ optional uint32 Port = 3;
+}
+
+extend google.protobuf.FieldOptions {
+ optional string PrintName = 50376;
+}
+
+message TNodeLocation {
+ // compatibility section -- will be removed in future versions
+ optional uint32 DataCenterNum = 1 [deprecated=true];
+ optional uint32 RoomNum = 2 [deprecated=true];
+ optional uint32 RackNum = 3 [deprecated=true];
+ optional uint32 BodyNum = 4 [deprecated=true];
+ optional uint32 Body = 100500 [deprecated=true]; // for compatibility with WalleLocation
+
+ optional string DataCenter = 10 [(PrintName) = "DC"];
+ optional string Module = 20 [(PrintName) = "M"];
+ optional string Rack = 30 [(PrintName) = "R"];
+ optional string Unit = 40 [(PrintName) = "U"];
+}
+
+message TClusterUUIDs {
+ optional string ClusterUUID = 1;
+ repeated string AcceptUUID = 2;
+}
+
+message TScopeId {
+ optional fixed64 X1 = 1;
+ optional fixed64 X2 = 2;
+}
+
+message THandshakeRequest {
+ required uint64 Protocol = 1;
+
+ required uint64 ProgramPID = 2;
+ required uint64 ProgramStartTime = 3;
+ required uint64 Serial = 4;
+
+ required uint32 ReceiverNodeId = 5;
+ required string SenderActorId = 6;
+
+ optional string SenderHostName = 7;
+ optional string ReceiverHostName = 8;
+ optional string UUID = 9;
+ optional TClusterUUIDs ClusterUUIDs = 13;
+
+ optional bytes Ballast = 10;
+
+ optional string VersionTag = 11;
+ repeated string AcceptedVersionTags = 12;
+
+ optional bool RequireEncryption = 14;
+ optional TScopeId ClientScopeId = 15;
+
+ optional string Cookie = 16;
+ optional bool DoCheckCookie = 17;
+
+ optional bool RequestModernFrame = 18;
+
+ optional bool RequestAuthOnly = 19;
+}
+
+message THandshakeSuccess {
+ required uint64 Protocol = 1;
+
+ required uint64 ProgramPID = 2;
+ required uint64 ProgramStartTime = 3;
+ required uint64 Serial = 4;
+
+ required string SenderActorId = 5;
+
+ optional string VersionTag = 6;
+ repeated string AcceptedVersionTags = 7;
+
+ optional TClusterUUIDs ClusterUUIDs = 8;
+
+ optional bool StartEncryption = 9;
+ optional TScopeId ServerScopeId = 10;
+
+ optional bool UseModernFrame = 11;
+
+ optional bool AuthOnly = 12;
+}
+
+message THandshakeReply {
+ optional THandshakeSuccess Success = 1;
+ optional string ErrorExplaination = 2;
+ optional bool CookieCheckResult = 3;
+}
+
+message TEvLoadMessage {
+ message THop {
+ optional NActorsProto.TActorId NextHop = 1; // if zero, then the payload is trimmed out of the message
+ }
+
+ repeated THop Hops = 1; // the route for the message
+ optional string Id = 3; // message identifier
+ optional bytes Payload = 4; // data payload
+}
diff --git a/library/cpp/actors/protos/services_common.proto b/library/cpp/actors/protos/services_common.proto
new file mode 100644
index 0000000000..afa0ec0073
--- /dev/null
+++ b/library/cpp/actors/protos/services_common.proto
@@ -0,0 +1,21 @@
+package NActorsServices;
+option java_package = "ru.yandex.kikimr.proto";
+
+// 0-255 range
+enum EServiceCommon {
+ // WARN: This must be the smallest value in the enumeration
+
+ GLOBAL = 0;
+ INTERCONNECT = 1;
+ TEST = 2;
+ PROTOCOLS = 3;
+ INTERCONNECT_SPEED_TEST = 4;
+ INTERCONNECT_STATUS = 5;
+ INTERCONNECT_NETWORK = 6;
+ INTERCONNECT_SESSION = 7;
+ HTTP = 8;
+
+ // This value is reserved boundary. Is must not be aliased with any values
+ // TODO: use reseved values upon protobuf update
+ // COMMON_END = 256;
+};
diff --git a/library/cpp/actors/protos/unittests.proto b/library/cpp/actors/protos/unittests.proto
new file mode 100644
index 0000000000..a856b0942a
--- /dev/null
+++ b/library/cpp/actors/protos/unittests.proto
@@ -0,0 +1,20 @@
+option cc_enable_arenas = true;
+
+message TSimple {
+ required string Str1 = 1;
+ optional string Str2 = 2;
+ optional uint64 Number1 = 3;
+}
+
+message TBigMessage {
+ repeated TSimple Simples = 1;
+ repeated string ManyStr = 2;
+ optional string OneMoreStr = 3;
+ optional uint64 YANumber = 4;
+}
+
+message TMessageWithPayload {
+ optional string Meta = 1;
+ repeated uint32 PayloadId = 2;
+ repeated string SomeData = 3;
+}
diff --git a/library/cpp/actors/protos/ya.make b/library/cpp/actors/protos/ya.make
new file mode 100644
index 0000000000..3a1488d78e
--- /dev/null
+++ b/library/cpp/actors/protos/ya.make
@@ -0,0 +1,14 @@
+PROTO_LIBRARY()
+
+OWNER(g:kikimr)
+
+SRCS(
+ actors.proto
+ interconnect.proto
+ services_common.proto
+ unittests.proto
+)
+
+EXCLUDE_TAGS(GO_PROTO)
+
+END()
diff --git a/library/cpp/actors/testlib/decorator_ut.cpp b/library/cpp/actors/testlib/decorator_ut.cpp
new file mode 100644
index 0000000000..e9a2fa3560
--- /dev/null
+++ b/library/cpp/actors/testlib/decorator_ut.cpp
@@ -0,0 +1,327 @@
+#include "test_runtime.h"
+
+#include <library/cpp/actors/core/actor_bootstrapped.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NActors;
+
+
+Y_UNIT_TEST_SUITE(TesTTestDecorator) {
+
+ bool IsVerbose = false;
+ void Write(TString msg) {
+ if (IsVerbose) {
+ Cerr << (TStringBuilder() << msg << Endl);
+ }
+ }
+
+ struct TDyingChecker : TTestDecorator {
+ TActorId MasterId;
+
+ TDyingChecker(THolder<IActor> &&actor, TActorId masterId)
+ : TTestDecorator(std::move(actor))
+ , MasterId(masterId)
+ {
+ Write("TDyingChecker::Construct\n");
+ }
+
+ virtual ~TDyingChecker() {
+ Write("TDyingChecker::~TDyingChecker");
+ TActivationContext::Send(new IEventHandle(MasterId, SelfId(), new TEvents::TEvPing()));
+ }
+
+ bool DoBeforeReceiving(TAutoPtr<IEventHandle> &/*ev*/, const TActorContext &/*ctx*/) override {
+ Write("TDyingChecker::DoBeforeReceiving");
+ return true;
+ }
+
+ void DoAfterReceiving(const TActorContext &/*ctx*/) override {
+ Write("TDyingChecker::DoAfterReceiving");
+ }
+ };
+
+ struct TTestMasterActor : TActorBootstrapped<TTestMasterActor> {
+ friend TActorBootstrapped<TTestMasterActor>;
+
+ TSet<TActorId> ActorIds;
+ TVector<THolder<IActor>> Actors;
+ TActorId EdgeActor;
+
+ TTestMasterActor(TVector<THolder<IActor>> &&actors, TActorId edgeActor)
+ : TActorBootstrapped()
+ , Actors(std::move(actors))
+ , EdgeActor(edgeActor)
+ {
+ }
+
+ void Bootstrap()
+ {
+ Write("Start master actor");
+ for (auto &actor : Actors) {
+ THolder<IActor> decaratedActor = MakeHolder<TDyingChecker>(std::move(actor), SelfId());
+ TActorId id = Register(decaratedActor.Release());
+ Write("Register test actor");
+ UNIT_ASSERT(ActorIds.insert(id).second);
+ }
+ Become(&TTestMasterActor::State);
+ }
+
+ STATEFN(State) {
+ auto it = ActorIds.find(ev->Sender);
+ UNIT_ASSERT(it != ActorIds.end());
+ Write("End test actor");
+ ActorIds.erase(it);
+ if (!ActorIds) {
+ Send(EdgeActor, new TEvents::TEvPing());
+ PassAway();
+ }
+ }
+ };
+
+ enum {
+ Begin = EventSpaceBegin(TEvents::ES_USERSPACE),
+ EvWords
+ };
+
+ struct TEvWords : TEventLocal<TEvWords, EvWords> {
+ TVector<TString> Words;
+
+ TEvWords()
+ : TEventLocal()
+ {
+ }
+ };
+
+ struct TFizzBuzzToFooBar : TTestDecorator {
+ TFizzBuzzToFooBar(THolder<IActor> &&actor)
+ : TTestDecorator(std::move(actor))
+ {
+ }
+
+ bool DoBeforeSending(TAutoPtr<IEventHandle> &ev) override {
+ if (ev->Type == TEvents::TSystem::Bootstrap) {
+ return true;
+ }
+ Write("TFizzBuzzToFooBar::DoBeforeSending");
+ TEventHandle<TEvWords> *handle = reinterpret_cast<TEventHandle<TEvWords>*>(ev.Get());
+ UNIT_ASSERT(handle);
+ TEvWords *event = handle->Get();
+ TVector<TString> &words = event->Words;
+ TStringBuilder wordsMsg;
+ for (auto &word : words) {
+ wordsMsg << word << ';';
+ }
+ Write(TStringBuilder() << "Send# " << wordsMsg);
+ if (words.size() == 2 && words[0] == "Fizz" && words[1] == "Buzz") {
+ words[0] = "Foo";
+ words[1] = "Bar";
+ }
+ return true;
+ }
+
+ bool DoBeforeReceiving(TAutoPtr<IEventHandle> &/*ev*/, const TActorContext &/*ctx*/) override {
+ Write("TFizzBuzzToFooBar::DoBeforeReceiving");
+ return true;
+ }
+
+ void DoAfterReceiving(const TActorContext &/*ctx*/) override {
+ Write("TFizzBuzzToFooBar::DoAfterReceiving");
+ }
+ };
+
+ struct TWordEraser : TTestDecorator {
+ TString ErasingWord;
+
+ TWordEraser(THolder<IActor> &&actor, TString word)
+ : TTestDecorator(std::move(actor))
+ , ErasingWord(word)
+ {
+ }
+
+ bool DoBeforeSending(TAutoPtr<IEventHandle> &ev) override {
+ if (ev->Type == TEvents::TSystem::Bootstrap) {
+ return true;
+ }
+ Write("TWordEraser::DoBeforeSending");
+ TEventHandle<TEvWords> *handle = reinterpret_cast<TEventHandle<TEvWords>*>(ev.Get());
+ UNIT_ASSERT(handle);
+ TEvWords *event = handle->Get();
+ TVector<TString> &words = event->Words;
+ auto it = Find(words.begin(), words.end(), ErasingWord);
+ if (it != words.end()) {
+ words.erase(it);
+ }
+ return true;
+ }
+
+ bool DoBeforeReceiving(TAutoPtr<IEventHandle> &/*ev*/, const TActorContext &/*ctx*/) override {
+ Write("TWordEraser::DoBeforeReceiving");
+ return true;
+ }
+
+ void DoAfterReceiving(const TActorContext &/*ctx*/) override {
+ Write("TWordEraser::DoAfterReceiving");
+ }
+ };
+
+ struct TWithoutWordsDroper : TTestDecorator {
+ TWithoutWordsDroper(THolder<IActor> &&actor)
+ : TTestDecorator(std::move(actor))
+ {
+ }
+
+ bool DoBeforeSending(TAutoPtr<IEventHandle> &ev) override {
+ if (ev->Type == TEvents::TSystem::Bootstrap) {
+ return true;
+ }
+ Write("TWithoutWordsDroper::DoBeforeSending");
+ TEventHandle<TEvWords> *handle = reinterpret_cast<TEventHandle<TEvWords>*>(ev.Get());
+ UNIT_ASSERT(handle);
+ TEvWords *event = handle->Get();
+ return bool(event->Words);
+ }
+
+ bool DoBeforeReceiving(TAutoPtr<IEventHandle> &/*ev*/, const TActorContext &/*ctx*/) override {
+ Write("TWithoutWordsDroper::DoBeforeReceiving");
+ return true;
+ }
+
+ void DoAfterReceiving(const TActorContext &/*ctx*/) override {
+ Write("TWithoutWordsDroper::DoAfterReceiving");
+ }
+ };
+
+ struct TFooBarReceiver : TActorBootstrapped<TFooBarReceiver> {
+ TActorId MasterId;
+ ui64 Counter = 0;
+
+ TFooBarReceiver(TActorId masterId)
+ : TActorBootstrapped()
+ , MasterId(masterId)
+ {
+ }
+
+ void Bootstrap()
+ {
+ Become(&TFooBarReceiver::State);
+ }
+
+ STATEFN(State) {
+ TEventHandle<TEvWords> *handle = reinterpret_cast<TEventHandle<TEvWords>*>(ev.Get());
+ UNIT_ASSERT(handle);
+ UNIT_ASSERT(handle->Sender == MasterId);
+ TEvWords *event = handle->Get();
+ TVector<TString> &words = event->Words;
+ UNIT_ASSERT(words.size() == 2 && words[0] == "Foo" && words[1] == "Bar");
+ Write(TStringBuilder() << "Receive# " << Counter + 1 << '/' << 2);
+ if (++Counter == 2) {
+ PassAway();
+ }
+ }
+ };
+
+ struct TFizzBuzzSender : TActorBootstrapped<TFizzBuzzSender> {
+ TActorId SlaveId;
+
+ TFizzBuzzSender()
+ : TActorBootstrapped()
+ {
+ Write("TFizzBuzzSender::Construct");
+ }
+
+ void Bootstrap() {
+ Write("TFizzBuzzSender::Bootstrap");
+ THolder<IActor> actor = MakeHolder<TFooBarReceiver>(SelfId());
+ THolder<IActor> decoratedActor = MakeHolder<TDyingChecker>(std::move(actor), SelfId());
+ SlaveId = Register(decoratedActor.Release());
+ for (ui64 idx = 1; idx <= 30; ++idx) {
+ THolder<TEvWords> ev = MakeHolder<TEvWords>();
+ if (idx % 3 == 0) {
+ ev->Words.push_back("Fizz");
+ }
+ if (idx % 5 == 0) {
+ ev->Words.push_back("Buzz");
+ }
+ Send(SlaveId, ev.Release());
+ Write("TFizzBuzzSender::Send words");
+ }
+ Become(&TFizzBuzzSender::State);
+ }
+
+ STATEFN(State) {
+ UNIT_ASSERT(ev->Sender == SlaveId);
+ PassAway();
+ }
+ };
+
+ struct TCounters {
+ ui64 SendedCount = 0;
+ ui64 RecievedCount = 0;
+ };
+
+ struct TCountingDecorator : TTestDecorator {
+ TCounters *Counters;
+
+ TCountingDecorator(THolder<IActor> &&actor, TCounters *counters)
+ : TTestDecorator(std::move(actor))
+ , Counters(counters)
+ {
+ }
+
+ bool DoBeforeSending(TAutoPtr<IEventHandle> &ev) override {
+ if (ev->Type == TEvents::TSystem::Bootstrap) {
+ return true;
+ }
+ Write("TCountingDecorator::DoBeforeSending");
+ Counters->SendedCount++;
+ return true;
+ }
+
+ bool DoBeforeReceiving(TAutoPtr<IEventHandle> &/*ev*/, const TActorContext &/*ctx*/) override {
+ Write("TCountingDecorator::DoBeforeReceiving");
+ Counters->RecievedCount++;
+ return true;
+ }
+ };
+
+ bool ScheduledFilterFunc(NActors::TTestActorRuntimeBase& runtime, TAutoPtr<NActors::IEventHandle>& event,
+ TDuration delay, TInstant& deadline) {
+ if (runtime.IsScheduleForActorEnabled(event->GetRecipientRewrite())) {
+ deadline = runtime.GetTimeProvider()->Now() + delay;
+ return false;
+ }
+ return true;
+ }
+
+ THolder<IActor> CreateFizzBuzzSender() {
+ THolder<IActor> actor = MakeHolder<TFizzBuzzSender>();
+ THolder<IActor> foobar = MakeHolder<TFizzBuzzToFooBar>(std::move(actor));
+ THolder<IActor> fizzEraser = MakeHolder<TWordEraser>(std::move(foobar), "Fizz");
+ THolder<IActor> buzzEraser = MakeHolder<TWordEraser>(std::move(fizzEraser), "Buzz");
+ return MakeHolder<TWithoutWordsDroper>(std::move(buzzEraser));
+ }
+
+ Y_UNIT_TEST(Basic) {
+ TTestActorRuntimeBase runtime(1, false);
+
+ runtime.SetScheduledEventFilter(&ScheduledFilterFunc);
+ runtime.SetEventFilter([](NActors::TTestActorRuntimeBase&, TAutoPtr<NActors::IEventHandle>&) {
+ return false;
+ });
+ runtime.Initialize();
+
+ TActorId edgeActor = runtime.AllocateEdgeActor();
+ TVector<THolder<IActor>> actors(1);
+ actors[0] = CreateFizzBuzzSender();
+ //actors[1] = CreateFizzBuzzSender();
+ THolder<IActor> testActor = MakeHolder<TTestMasterActor>(std::move(actors), edgeActor);
+ Write("Start test");
+ runtime.Register(testActor.Release());
+
+ TAutoPtr<IEventHandle> handle;
+ auto ev = runtime.GrabEdgeEventRethrow<TEvents::TEvPing>(handle);
+ UNIT_ASSERT(ev);
+ Write("Stop test");
+ }
+}
diff --git a/library/cpp/actors/testlib/test_runtime.cpp b/library/cpp/actors/testlib/test_runtime.cpp
new file mode 100644
index 0000000000..6fa25b9965
--- /dev/null
+++ b/library/cpp/actors/testlib/test_runtime.cpp
@@ -0,0 +1,1902 @@
+#include "test_runtime.h"
+
+#include <library/cpp/actors/core/actor_bootstrapped.h>
+#include <library/cpp/actors/core/callstack.h>
+#include <library/cpp/actors/core/executor_pool_basic.h>
+#include <library/cpp/actors/core/executor_pool_io.h>
+#include <library/cpp/actors/core/log.h>
+#include <library/cpp/actors/core/scheduler_basic.h>
+#include <library/cpp/actors/util/datetime.h>
+#include <library/cpp/actors/protos/services_common.pb.h>
+#include <library/cpp/random_provider/random_provider.h>
+#include <library/cpp/actors/interconnect/interconnect.h>
+#include <library/cpp/actors/interconnect/interconnect_tcp_proxy.h>
+#include <library/cpp/actors/interconnect/interconnect_proxy_wrapper.h>
+
+#include <util/generic/maybe.h>
+#include <util/generic/bt_exception.h>
+#include <util/random/mersenne.h>
+#include <util/string/printf.h>
+#include <typeinfo>
+
+bool VERBOSE = false;
+const bool PRINT_EVENT_BODY = false;
+
+namespace {
+
+ TString MakeClusterId() {
+ pid_t pid = getpid();
+ TStringBuilder uuid;
+ uuid << "Cluster for process with id: " << pid;
+ return uuid;
+ }
+}
+
+namespace NActors {
+ ui64 TScheduledEventQueueItem::NextUniqueId = 0;
+
+ void PrintEvent(TAutoPtr<IEventHandle>& ev, const TTestActorRuntimeBase* runtime) {
+ Cerr << "mailbox: " << ev->GetRecipientRewrite().Hint() << ", type: " << Sprintf("%08x", ev->GetTypeRewrite())
+ << ", from " << ev->Sender.LocalId();
+ TString name = runtime->GetActorName(ev->Sender);
+ if (!name.empty())
+ Cerr << " \"" << name << "\"";
+ Cerr << ", to " << ev->GetRecipientRewrite().LocalId();
+ name = runtime->GetActorName(ev->GetRecipientRewrite());
+ if (!name.empty())
+ Cerr << " \"" << name << "\"";
+ Cerr << ", ";
+ if (ev->HasEvent())
+ Cerr << " : " << (PRINT_EVENT_BODY ? ev->GetBase()->ToString() : ev->GetBase()->ToStringHeader());
+ else if (ev->HasBuffer())
+ Cerr << " : BUFFER";
+ else
+ Cerr << " : EMPTY";
+
+ Cerr << "\n";
+ }
+
+ TTestActorRuntimeBase::TNodeDataBase::TNodeDataBase() {
+ ActorSystemTimestamp = nullptr;
+ ActorSystemMonotonic = nullptr;
+ }
+
+ void TTestActorRuntimeBase::TNodeDataBase::Stop() {
+ if (Poller)
+ Poller->Stop();
+
+ if (MailboxTable) {
+ for (ui32 round = 0; !MailboxTable->Cleanup(); ++round)
+ Y_VERIFY(round < 10, "cyclic event/actor spawn while trying to shutdown actorsystem stub");
+ }
+
+ if (ActorSystem)
+ ActorSystem->Stop();
+
+ ActorSystem.Destroy();
+ Poller.Reset();
+ }
+
+ TTestActorRuntimeBase::TNodeDataBase::~TNodeDataBase() {
+ Stop();
+ }
+
+
+ class TTestActorRuntimeBase::TEdgeActor : public TActor<TEdgeActor> {
+ public:
+ static constexpr EActivityType ActorActivityType() {
+ return TEST_ACTOR_RUNTIME;
+ }
+
+ TEdgeActor(TTestActorRuntimeBase* runtime)
+ : TActor(&TEdgeActor::StateFunc)
+ , Runtime(runtime)
+ {
+ }
+
+ STFUNC(StateFunc) {
+ Y_UNUSED(ctx);
+ TGuard<TMutex> guard(Runtime->Mutex);
+ bool verbose = (Runtime->CurrentDispatchContext ? !Runtime->CurrentDispatchContext->Options->Quiet : true) && VERBOSE;
+ if (Runtime->BlockedOutput.find(ev->Sender) != Runtime->BlockedOutput.end()) {
+ verbose = false;
+ }
+
+ if (verbose) {
+ Cerr << "Got event at " << TInstant::MicroSeconds(Runtime->CurrentTimestamp) << ", ";
+ PrintEvent(ev, Runtime);
+ }
+
+ if (!Runtime->EventFilterFunc(*Runtime, ev)) {
+ ui32 nodeId = ev->GetRecipientRewrite().NodeId();
+ Y_VERIFY(nodeId != 0);
+ ui32 mailboxHint = ev->GetRecipientRewrite().Hint();
+ Runtime->GetMailbox(nodeId, mailboxHint).Send(ev);
+ Runtime->MailboxesHasEvents.Signal();
+ if (verbose)
+ Cerr << "Event was added to sent queue\n";
+ }
+ else {
+ if (verbose)
+ Cerr << "Event was dropped\n";
+ }
+ }
+
+ private:
+ TTestActorRuntimeBase* Runtime;
+ };
+
+ void TEventMailBox::Send(TAutoPtr<IEventHandle> ev) {
+ IEventHandle* ptr = ev.Get();
+ Y_VERIFY(ptr);
+#ifdef DEBUG_ORDER_EVENTS
+ ui64 counter = NextToSend++;
+ TrackSent[ptr] = counter;
+#endif
+ Sent.push_back(ev);
+ }
+
+ TAutoPtr<IEventHandle> TEventMailBox::Pop() {
+ TAutoPtr<IEventHandle> result = Sent.front();
+ Sent.pop_front();
+#ifdef DEBUG_ORDER_EVENTS
+ auto it = TrackSent.find(result.Get());
+ if (it != TrackSent.end()) {
+ Y_VERIFY(ExpectedReceive == it->second);
+ TrackSent.erase(result.Get());
+ ++ExpectedReceive;
+ }
+#endif
+ return result;
+ }
+
+ bool TEventMailBox::IsEmpty() const {
+ return Sent.empty();
+ }
+
+ void TEventMailBox::Capture(TEventsList& evList) {
+ evList.insert(evList.end(), Sent.begin(), Sent.end());
+ Sent.clear();
+ }
+
+ void TEventMailBox::PushFront(TAutoPtr<IEventHandle>& ev) {
+ Sent.push_front(ev);
+ }
+
+ void TEventMailBox::PushFront(TEventsList& evList) {
+ for (auto rit = evList.rbegin(); rit != evList.rend(); ++rit) {
+ if (*rit) {
+ Sent.push_front(*rit);
+ }
+ }
+ }
+
+ void TEventMailBox::CaptureScheduled(TScheduledEventsList& evList) {
+ for (auto it = Scheduled.begin(); it != Scheduled.end(); ++it) {
+ evList.insert(*it);
+ }
+
+ Scheduled.clear();
+ }
+
+ void TEventMailBox::PushScheduled(TScheduledEventsList& evList) {
+ for (auto it = evList.begin(); it != evList.end(); ++it) {
+ if (it->Event) {
+ Scheduled.insert(*it);
+ }
+ }
+
+ evList.clear();
+ }
+
+ bool TEventMailBox::IsActive(const TInstant& currentTime) const {
+ return currentTime >= InactiveUntil;
+ }
+
+ void TEventMailBox::Freeze(const TInstant& deadline) {
+ if (deadline > InactiveUntil)
+ InactiveUntil = deadline;
+ }
+
+ TInstant TEventMailBox::GetInactiveUntil() const {
+ return InactiveUntil;
+ }
+
+ void TEventMailBox::Schedule(const TScheduledEventQueueItem& item) {
+ Scheduled.insert(item);
+ }
+
+ bool TEventMailBox::IsScheduledEmpty() const {
+ return Scheduled.empty();
+ }
+
+ TInstant TEventMailBox::GetFirstScheduleDeadline() const {
+ return Scheduled.begin()->Deadline;
+ }
+
+ ui64 TEventMailBox::GetSentEventCount() const {
+ return Sent.size();
+ }
+
+ class TTestActorRuntimeBase::TTimeProvider : public ITimeProvider {
+ public:
+ TTimeProvider(TTestActorRuntimeBase& runtime)
+ : Runtime(runtime)
+ {
+ }
+
+ TInstant Now() override {
+ return Runtime.GetCurrentTime();
+ }
+
+ private:
+ TTestActorRuntimeBase& Runtime;
+ };
+
+ class TTestActorRuntimeBase::TSchedulerThreadStub : public ISchedulerThread {
+ public:
+ TSchedulerThreadStub(TTestActorRuntimeBase* runtime, TTestActorRuntimeBase::TNodeDataBase* node)
+ : Runtime(runtime)
+ , Node(node)
+ {
+ Y_UNUSED(Runtime);
+ }
+
+ void Prepare(TActorSystem *actorSystem, volatile ui64 *currentTimestamp, volatile ui64 *currentMonotonic) override {
+ Y_UNUSED(actorSystem);
+ Node->ActorSystemTimestamp = currentTimestamp;
+ Node->ActorSystemMonotonic = currentMonotonic;
+ }
+
+ void PrepareSchedules(NSchedulerQueue::TReader **readers, ui32 scheduleReadersCount) override {
+ Y_UNUSED(readers);
+ Y_UNUSED(scheduleReadersCount);
+ }
+
+ void Start() override {
+ }
+
+ void PrepareStop() override {
+ }
+
+ void Stop() override {
+ }
+
+ private:
+ TTestActorRuntimeBase* Runtime;
+ TTestActorRuntimeBase::TNodeDataBase* Node;
+ };
+
+ class TTestActorRuntimeBase::TExecutorPoolStub : public IExecutorPool {
+ public:
+ TExecutorPoolStub(TTestActorRuntimeBase* runtime, ui32 nodeIndex, TTestActorRuntimeBase::TNodeDataBase* node, ui32 poolId)
+ : IExecutorPool(poolId)
+ , Runtime(runtime)
+ , NodeIndex(nodeIndex)
+ , Node(node)
+ {
+ }
+
+ TTestActorRuntimeBase* GetRuntime() {
+ return Runtime;
+ }
+
+ // for threads
+ ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) override {
+ Y_UNUSED(wctx);
+ Y_UNUSED(revolvingCounter);
+ Y_FAIL();
+ }
+
+ void ReclaimMailbox(TMailboxType::EType mailboxType, ui32 hint, TWorkerId workerId, ui64 revolvingCounter) override {
+ Y_UNUSED(workerId);
+ Node->MailboxTable->ReclaimMailbox(mailboxType, hint, revolvingCounter);
+ }
+
+ void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie *cookie, TWorkerId workerId) override {
+ DoSchedule(deadline, ev, cookie, workerId);
+ }
+
+ void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie *cookie, TWorkerId workerId) override {
+ DoSchedule(TInstant::FromValue(deadline.GetValue()), ev, cookie, workerId);
+ }
+
+ void Schedule(TDuration delay, TAutoPtr<IEventHandle> ev, ISchedulerCookie *cookie, TWorkerId workerId) override {
+ TInstant deadline = Runtime->GetTimeProvider()->Now() + delay;
+ DoSchedule(deadline, ev, cookie, workerId);
+ }
+
+ void DoSchedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie *cookie, TWorkerId workerId) {
+ Y_UNUSED(workerId);
+
+ TGuard<TMutex> guard(Runtime->Mutex);
+ bool verbose = (Runtime->CurrentDispatchContext ? !Runtime->CurrentDispatchContext->Options->Quiet : true) && VERBOSE;
+ if (Runtime->BlockedOutput.find(ev->Sender) != Runtime->BlockedOutput.end()) {
+ verbose = false;
+ }
+
+ if (verbose) {
+ Cerr << "Got scheduled event at " << TInstant::MicroSeconds(Runtime->CurrentTimestamp) << ", ";
+ PrintEvent(ev, Runtime);
+ }
+
+ auto now = Runtime->GetTimeProvider()->Now();
+ if (deadline < now) {
+ deadline = now; // avoid going backwards in time
+ }
+ TDuration delay = (deadline - now);
+
+ if (Runtime->SingleSysEnv || !Runtime->ScheduledEventFilterFunc(*Runtime, ev, delay, deadline)) {
+ ui32 mailboxHint = ev->GetRecipientRewrite().Hint();
+ Runtime->GetMailbox(Runtime->FirstNodeId + NodeIndex, mailboxHint).Schedule(TScheduledEventQueueItem(deadline, ev, cookie));
+ Runtime->MailboxesHasEvents.Signal();
+ if (verbose)
+ Cerr << "Event was added to scheduled queue\n";
+ } else {
+ if (cookie) {
+ cookie->Detach();
+ }
+ if (verbose) {
+ Cerr << "Scheduled event for " << ev->GetRecipientRewrite().ToString() << " was dropped\n";
+ }
+ }
+ }
+
+ // for actorsystem
+ bool Send(TAutoPtr<IEventHandle>& ev) override {
+ TGuard<TMutex> guard(Runtime->Mutex);
+ bool verbose = (Runtime->CurrentDispatchContext ? !Runtime->CurrentDispatchContext->Options->Quiet : true) && VERBOSE;
+ if (Runtime->BlockedOutput.find(ev->Sender) != Runtime->BlockedOutput.end()) {
+ verbose = false;
+ }
+
+ if (verbose) {
+ Cerr << "Got event at " << TInstant::MicroSeconds(Runtime->CurrentTimestamp) << ", ";
+ PrintEvent(ev, Runtime);
+ }
+
+ if (!Runtime->EventFilterFunc(*Runtime, ev)) {
+ ui32 nodeId = ev->GetRecipientRewrite().NodeId();
+ Y_VERIFY(nodeId != 0);
+ TNodeDataBase* node = Runtime->Nodes[nodeId].Get();
+
+ if (!AllowSendFrom(node, ev)) {
+ return true;
+ }
+
+ ui32 mailboxHint = ev->GetRecipientRewrite().Hint();
+ if (ev->GetTypeRewrite() == ui32(NActors::NLog::EEv::Log)) {
+ const NActors::TActorId loggerActorId = NActors::TActorId(nodeId, "logger");
+ TActorId logger = node->ActorSystem->LookupLocalService(loggerActorId);
+ if (ev->GetRecipientRewrite() == logger) {
+ TMailboxHeader* mailbox = node->MailboxTable->Get(mailboxHint);
+ IActor* recipientActor = mailbox->FindActor(ev->GetRecipientRewrite().LocalId());
+ if (recipientActor) {
+ TActorContext ctx(*mailbox, *node->ExecutorThread, GetCycleCountFast(), ev->GetRecipientRewrite());
+ TActivationContext *prevTlsActivationContext = TlsActivationContext;
+ TlsActivationContext = &ctx;
+ recipientActor->Receive(ev, ctx);
+ TlsActivationContext = prevTlsActivationContext;
+ // we expect the logger to never die in tests
+ }
+ }
+ } else {
+ Runtime->GetMailbox(nodeId, mailboxHint).Send(ev);
+ Runtime->MailboxesHasEvents.Signal();
+ }
+ if (verbose)
+ Cerr << "Event was added to sent queue\n";
+ } else {
+ if (verbose)
+ Cerr << "Event was dropped\n";
+ }
+ return true;
+ }
+
+ void ScheduleActivation(ui32 activation) override {
+ Y_UNUSED(activation);
+ }
+
+ void ScheduleActivationEx(ui32 activation, ui64 revolvingCounter) override {
+ Y_UNUSED(activation);
+ Y_UNUSED(revolvingCounter);
+ }
+
+ TActorId Register(IActor *actor, TMailboxType::EType mailboxType, ui64 revolvingCounter,
+ const TActorId& parentId) override {
+ return Runtime->Register(actor, NodeIndex, PoolId, mailboxType, revolvingCounter, parentId);
+ }
+
+ TActorId Register(IActor *actor, TMailboxHeader *mailbox, ui32 hint, const TActorId& parentId) override {
+ return Runtime->Register(actor, NodeIndex, PoolId, mailbox, hint, parentId);
+ }
+
+ // lifecycle stuff
+ void Prepare(TActorSystem *actorSystem, NSchedulerQueue::TReader **scheduleReaders, ui32 *scheduleSz) override {
+ Y_UNUSED(actorSystem);
+ Y_UNUSED(scheduleReaders);
+ Y_UNUSED(scheduleSz);
+ }
+
+ void Start() override {
+ }
+
+ void PrepareStop() override {
+ }
+
+ void Shutdown() override {
+ }
+
+ bool Cleanup() override {
+ return true;
+ }
+
+ // generic
+ TAffinity* Affinity() const override {
+ Y_FAIL();
+ }
+
+ private:
+ TTestActorRuntimeBase* const Runtime;
+ const ui32 NodeIndex;
+ TTestActorRuntimeBase::TNodeDataBase* const Node;
+ };
+
+ IExecutorPool* TTestActorRuntimeBase::CreateExecutorPoolStub(TTestActorRuntimeBase* runtime, ui32 nodeIndex, TTestActorRuntimeBase::TNodeDataBase* node, ui32 poolId) {
+ return new TExecutorPoolStub{runtime, nodeIndex, node, poolId};
+ }
+
+
+ ui32 TTestActorRuntimeBase::NextNodeId = 1;
+
+ TTestActorRuntimeBase::TTestActorRuntimeBase(THeSingleSystemEnv)
+ : TTestActorRuntimeBase(1, 1, false)
+ {
+ SingleSysEnv = true;
+ }
+
+ TTestActorRuntimeBase::TTestActorRuntimeBase(ui32 nodeCount, ui32 dataCenterCount, bool useRealThreads)
+ : ScheduledCount(0)
+ , ScheduledLimit(100000)
+ , MainThreadId(TThread::CurrentThreadId())
+ , ClusterUUID(MakeClusterId())
+ , FirstNodeId(NextNodeId)
+ , NodeCount(nodeCount)
+ , DataCenterCount(dataCenterCount)
+ , UseRealThreads(useRealThreads)
+ , LocalId(0)
+ , DispatchCyclesCount(0)
+ , DispatchedEventsCount(0)
+ , NeedMonitoring(false)
+ , RandomProvider(CreateDeterministicRandomProvider(DefaultRandomSeed))
+ , TimeProvider(new TTimeProvider(*this))
+ , ShouldContinue()
+ , CurrentTimestamp(0)
+ , DispatchTimeout(DEFAULT_DISPATCH_TIMEOUT)
+ , ReschedulingDelay(TDuration::MicroSeconds(0))
+ , ObserverFunc(&TTestActorRuntimeBase::DefaultObserverFunc)
+ , ScheduledEventsSelectorFunc(&CollapsedTimeScheduledEventsSelector)
+ , EventFilterFunc(&TTestActorRuntimeBase::DefaultFilterFunc)
+ , ScheduledEventFilterFunc(&TTestActorRuntimeBase::NopFilterFunc)
+ , RegistrationObserver(&TTestActorRuntimeBase::DefaultRegistrationObserver)
+ , CurrentDispatchContext(nullptr)
+ {
+ SetDispatcherRandomSeed(TInstant::Now(), 0);
+ EnableActorCallstack();
+ }
+
+ void TTestActorRuntimeBase::InitNode(TNodeDataBase* node, size_t nodeIndex) {
+ const NActors::TActorId loggerActorId = NActors::TActorId(FirstNodeId + nodeIndex, "logger");
+ node->LogSettings = new NActors::NLog::TSettings(loggerActorId, 410 /* NKikimrServices::LOGGER */,
+ NActors::NLog::PRI_WARN, NActors::NLog::PRI_WARN, 0);
+ node->LogSettings->SetAllowDrop(false);
+ node->LogSettings->SetThrottleDelay(TDuration::Zero());
+ node->DynamicCounters = new NMonitoring::TDynamicCounters;
+
+ InitNodeImpl(node, nodeIndex);
+ }
+
+ void TTestActorRuntimeBase::InitNodeImpl(TNodeDataBase* node, size_t nodeIndex) {
+ node->LogSettings->Append(
+ NActorsServices::EServiceCommon_MIN,
+ NActorsServices::EServiceCommon_MAX,
+ NActorsServices::EServiceCommon_Name
+ );
+
+ if (!UseRealThreads) {
+ node->SchedulerPool.Reset(CreateExecutorPoolStub(this, nodeIndex, node, 0));
+ node->MailboxTable.Reset(new TMailboxTable());
+ node->ActorSystem = MakeActorSystem(nodeIndex, node);
+ node->ExecutorThread.Reset(new TExecutorThread(0, 0, node->ActorSystem.Get(), node->SchedulerPool.Get(), node->MailboxTable.Get(), "TestExecutor"));
+ } else {
+ node->ActorSystem = MakeActorSystem(nodeIndex, node);
+ }
+
+ node->ActorSystem->Start();
+ }
+
+ bool TTestActorRuntimeBase::AllowSendFrom(TNodeDataBase* node, TAutoPtr<IEventHandle>& ev) {
+ ui64 senderLocalId = ev->Sender.LocalId();
+ ui64 senderMailboxHint = ev->Sender.Hint();
+ TMailboxHeader* senderMailbox = node->MailboxTable->Get(senderMailboxHint);
+ if (senderMailbox) {
+ IActor* senderActor = senderMailbox->FindActor(senderLocalId);
+ TTestDecorator *decorator = dynamic_cast<TTestDecorator*>(senderActor);
+ return !decorator || decorator->BeforeSending(ev);
+ }
+ return true;
+ }
+
+ TTestActorRuntimeBase::TTestActorRuntimeBase(ui32 nodeCount, ui32 dataCenterCount)
+ : TTestActorRuntimeBase(nodeCount, dataCenterCount, false) {
+ }
+
+ TTestActorRuntimeBase::TTestActorRuntimeBase(ui32 nodeCount, bool useRealThreads)
+ : TTestActorRuntimeBase(nodeCount, nodeCount, useRealThreads) {
+ }
+
+ TTestActorRuntimeBase::~TTestActorRuntimeBase() {
+ CleanupNodes();
+ Cerr.Flush();
+ Cerr.Flush();
+ Clog.Flush();
+
+ DisableActorCallstack();
+ }
+
+ void TTestActorRuntimeBase::CleanupNodes() {
+ Nodes.clear();
+ }
+
+ bool TTestActorRuntimeBase::IsRealThreads() const {
+ return UseRealThreads;
+ }
+
+ TTestActorRuntimeBase::EEventAction TTestActorRuntimeBase::DefaultObserverFunc(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event) {
+ Y_UNUSED(runtime);
+ Y_UNUSED(event);
+ return EEventAction::PROCESS;
+ }
+
+ void TTestActorRuntimeBase::DroppingScheduledEventsSelector(TTestActorRuntimeBase& runtime, TScheduledEventsList& scheduledEvents, TEventsList& queue) {
+ Y_UNUSED(runtime);
+ Y_UNUSED(queue);
+ scheduledEvents.clear();
+ }
+
+ bool TTestActorRuntimeBase::DefaultFilterFunc(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event) {
+ Y_UNUSED(runtime);
+ Y_UNUSED(event);
+ return false;
+ }
+
+ bool TTestActorRuntimeBase::NopFilterFunc(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event, TDuration delay, TInstant& deadline) {
+ Y_UNUSED(runtime);
+ Y_UNUSED(delay);
+ Y_UNUSED(event);
+ Y_UNUSED(deadline);
+ return true;
+ }
+
+
+ void TTestActorRuntimeBase::DefaultRegistrationObserver(TTestActorRuntimeBase& runtime, const TActorId& parentId, const TActorId& actorId) {
+ if (runtime.ScheduleWhiteList.find(parentId) != runtime.ScheduleWhiteList.end()) {
+ runtime.ScheduleWhiteList.insert(actorId);
+ runtime.ScheduleWhiteListParent[actorId] = parentId;
+ }
+ }
+
+ class TScheduledTreeItem {
+ public:
+ TString Name;
+ ui64 Count;
+ TVector<TScheduledTreeItem> Children;
+
+ TScheduledTreeItem(const TString& name)
+ : Name(name)
+ , Count(0)
+ {}
+
+ TScheduledTreeItem* GetItem(const TString& name) {
+ TScheduledTreeItem* item = nullptr;
+ for (TScheduledTreeItem& i : Children) {
+ if (i.Name == name) {
+ item = &i;
+ break;
+ }
+ }
+ if (item != nullptr)
+ return item;
+ Children.emplace_back(name);
+ return &Children.back();
+ }
+
+ void RecursiveSort() {
+ Sort(Children, [](const TScheduledTreeItem& a, const TScheduledTreeItem& b) -> bool { return a.Count > b.Count; });
+ for (TScheduledTreeItem& item : Children) {
+ item.RecursiveSort();
+ }
+ }
+
+ void Print(IOutputStream& stream, const TString& prefix) {
+ for (auto it = Children.begin(); it != Children.end(); ++it) {
+ bool lastChild = (std::next(it) == Children.end());
+ TString connectionPrefix = lastChild ? "└─ " : "├─ ";
+ TString subChildPrefix = lastChild ? " " : "│ ";
+ stream << prefix << connectionPrefix << it->Name << " (" << it->Count << ")\n";
+ it->Print(stream, prefix + subChildPrefix);
+ }
+ }
+
+ void Print(IOutputStream& stream) {
+ stream << Name << " (" << Count << ")\n";
+ Print(stream, TString());
+ }
+ };
+
+ void TTestActorRuntimeBase::CollapsedTimeScheduledEventsSelector(TTestActorRuntimeBase& runtime, TScheduledEventsList& scheduledEvents, TEventsList& queue) {
+ if (scheduledEvents.empty())
+ return;
+
+ TInstant time = scheduledEvents.begin()->Deadline;
+ while (!scheduledEvents.empty() && scheduledEvents.begin()->Deadline == time) {
+ static THashMap<std::pair<TActorId, TString>, ui64> eventTypes;
+ auto& item = *scheduledEvents.begin();
+ TString name = item.Event->GetBase() ? TypeName(*item.Event->GetBase()) : Sprintf("%08" PRIx32, item.Event->Type);
+ eventTypes[std::make_pair(item.Event->Recipient, name)]++;
+ runtime.ScheduledCount++;
+ if (runtime.ScheduledCount > runtime.ScheduledLimit) {
+// TScheduledTreeItem root("Root");
+// TVector<TString> path;
+// for (const auto& pr : eventTypes) {
+// path.clear();
+// path.push_back(runtime.GetActorName(pr.first.first));
+// for (auto it = runtime.ScheduleWhiteListParent.find(pr.first.first); it != runtime.ScheduleWhiteListParent.end(); it = runtime.ScheduleWhiteListParent.find(it->second)) {
+// path.insert(path.begin(), runtime.GetActorName(it->second));
+// }
+// path.push_back("<" + pr.first.second + ">"); // event name;
+// ui64 count = pr.second;
+// TScheduledTreeItem* item = &root;
+// item->Count += count;
+// for (TString name : path) {
+// item = item->GetItem(name);
+// item->Count += count;
+// }
+// }
+// root.RecursiveSort();
+// root.Print(Cerr);
+
+ ythrow TSchedulingLimitReachedException(runtime.ScheduledLimit);
+ }
+ if (item.Cookie->Get()) {
+ if (item.Cookie->Detach()) {
+ queue.push_back(item.Event);
+ }
+ } else {
+ queue.push_back(item.Event);
+ }
+
+ scheduledEvents.erase(scheduledEvents.begin());
+ }
+
+ runtime.UpdateCurrentTime(time);
+ }
+
+ TTestActorRuntimeBase::TEventObserver TTestActorRuntimeBase::SetObserverFunc(TEventObserver observerFunc) {
+ TGuard<TMutex> guard(Mutex);
+ auto result = ObserverFunc;
+ ObserverFunc = observerFunc;
+ return result;
+ }
+
+ TTestActorRuntimeBase::TScheduledEventsSelector TTestActorRuntimeBase::SetScheduledEventsSelectorFunc(TScheduledEventsSelector scheduledEventsSelectorFunc) {
+ TGuard<TMutex> guard(Mutex);
+ auto result = ScheduledEventsSelectorFunc;
+ ScheduledEventsSelectorFunc = scheduledEventsSelectorFunc;
+ return result;
+ }
+
+ TTestActorRuntimeBase::TEventFilter TTestActorRuntimeBase::SetEventFilter(TEventFilter filterFunc) {
+ TGuard<TMutex> guard(Mutex);
+ auto result = EventFilterFunc;
+ EventFilterFunc = filterFunc;
+ return result;
+ }
+
+ TTestActorRuntimeBase::TScheduledEventFilter TTestActorRuntimeBase::SetScheduledEventFilter(TScheduledEventFilter filterFunc) {
+ TGuard<TMutex> guard(Mutex);
+ auto result = ScheduledEventFilterFunc;
+ ScheduledEventFilterFunc = filterFunc;
+ return result;
+ }
+
+ TTestActorRuntimeBase::TRegistrationObserver TTestActorRuntimeBase::SetRegistrationObserverFunc(TRegistrationObserver observerFunc) {
+ TGuard<TMutex> guard(Mutex);
+ auto result = RegistrationObserver;
+ RegistrationObserver = observerFunc;
+ return result;
+ }
+
+ bool TTestActorRuntimeBase::IsVerbose() {
+ return VERBOSE;
+ }
+
+ void TTestActorRuntimeBase::SetVerbose(bool verbose) {
+ VERBOSE = verbose;
+ }
+
+ void TTestActorRuntimeBase::AddLocalService(const TActorId& actorId, const TActorSetupCmd& cmd, ui32 nodeIndex) {
+ Y_VERIFY(!IsInitialized);
+ Y_VERIFY(nodeIndex < NodeCount);
+ auto node = Nodes[nodeIndex + FirstNodeId];
+ if (!node) {
+ node = GetNodeFactory().CreateNode();
+ Nodes[nodeIndex + FirstNodeId] = node;
+ }
+
+ node->LocalServicesActors[actorId] = cmd.Actor;
+ node->LocalServices.push_back(std::make_pair(actorId, cmd));
+ }
+
+ void TTestActorRuntimeBase::InitNodes() {
+ NextNodeId += NodeCount;
+ Y_VERIFY(NodeCount > 0);
+
+ for (ui32 nodeIndex = 0; nodeIndex < NodeCount; ++nodeIndex) {
+ auto nodeIt = Nodes.emplace(FirstNodeId + nodeIndex, GetNodeFactory().CreateNode()).first;
+ TNodeDataBase* node = nodeIt->second.Get();
+ InitNode(node, nodeIndex);
+ }
+
+ }
+
+ void TTestActorRuntimeBase::Initialize() {
+ InitNodes();
+ IsInitialized = true;
+ }
+
+ void SetupCrossDC() {
+
+ }
+
+ TDuration TTestActorRuntimeBase::SetDispatchTimeout(TDuration timeout) {
+ TGuard<TMutex> guard(Mutex);
+ TDuration oldTimeout = DispatchTimeout;
+ DispatchTimeout = timeout;
+ return oldTimeout;
+ }
+
+ TDuration TTestActorRuntimeBase::SetReschedulingDelay(TDuration delay) {
+ TGuard<TMutex> guard(Mutex);
+ TDuration oldDelay = ReschedulingDelay;
+ ReschedulingDelay = delay;
+ return oldDelay;
+ }
+
+ void TTestActorRuntimeBase::SetLogBackend(const TAutoPtr<TLogBackend> logBackend) {
+ Y_VERIFY(!IsInitialized);
+ TGuard<TMutex> guard(Mutex);
+ LogBackend = logBackend;
+ }
+
+ void TTestActorRuntimeBase::SetLogPriority(NActors::NLog::EComponent component, NActors::NLog::EPriority priority) {
+ TGuard<TMutex> guard(Mutex);
+ for (ui32 nodeIndex = 0; nodeIndex < NodeCount; ++nodeIndex) {
+ TNodeDataBase* node = Nodes[FirstNodeId + nodeIndex].Get();
+ TString explanation;
+ auto status = node->LogSettings->SetLevel(priority, component, explanation);
+ if (status) {
+ Y_FAIL("SetLogPriority failed: %s", explanation.c_str());
+ }
+ }
+ }
+
+ TInstant TTestActorRuntimeBase::GetCurrentTime() const {
+ TGuard<TMutex> guard(Mutex);
+ Y_VERIFY(!UseRealThreads);
+ return TInstant::MicroSeconds(CurrentTimestamp);
+ }
+
+ void TTestActorRuntimeBase::UpdateCurrentTime(TInstant newTime) {
+ static int counter = 0;
+ ++counter;
+ if (VERBOSE) {
+ Cerr << "UpdateCurrentTime(" << counter << "," << newTime << ")\n";
+ }
+ TGuard<TMutex> guard(Mutex);
+ Y_VERIFY(!UseRealThreads);
+ if (newTime.MicroSeconds() > CurrentTimestamp) {
+ CurrentTimestamp = newTime.MicroSeconds();
+ for (auto& kv : Nodes) {
+ AtomicStore(kv.second->ActorSystemTimestamp, CurrentTimestamp);
+ AtomicStore(kv.second->ActorSystemMonotonic, CurrentTimestamp);
+ }
+ }
+ }
+
+ void TTestActorRuntimeBase::AdvanceCurrentTime(TDuration duration) {
+ UpdateCurrentTime(GetCurrentTime() + duration);
+ }
+
+ TIntrusivePtr<ITimeProvider> TTestActorRuntimeBase::GetTimeProvider() {
+ Y_VERIFY(!UseRealThreads);
+ return TimeProvider;
+ }
+
+ ui32 TTestActorRuntimeBase::GetNodeId(ui32 index) const {
+ Y_VERIFY(index < NodeCount);
+ return FirstNodeId + index;
+ }
+
+ ui32 TTestActorRuntimeBase::GetNodeCount() const {
+ return NodeCount;
+ }
+
+ ui64 TTestActorRuntimeBase::AllocateLocalId() {
+ TGuard<TMutex> guard(Mutex);
+ ui64 nextId = ++LocalId;
+ if (VERBOSE) {
+ Cerr << "Allocated id: " << nextId << "\n";
+ }
+
+ return nextId;
+ }
+
+ ui32 TTestActorRuntimeBase::InterconnectPoolId() const {
+ if (UseRealThreads && NSan::TSanIsOn()) {
+ // Interconnect coroutines may move across threads
+ // Use a special single-threaded pool to avoid that
+ return 4;
+ }
+ return 0;
+ }
+
+ TString TTestActorRuntimeBase::GetTempDir() {
+ if (!TmpDir)
+ TmpDir.Reset(new TTempDir());
+ return (*TmpDir)();
+ }
+
+ TActorId TTestActorRuntimeBase::Register(IActor* actor, ui32 nodeIndex, ui32 poolId, TMailboxType::EType mailboxType,
+ ui64 revolvingCounter, const TActorId& parentId) {
+ Y_VERIFY(nodeIndex < NodeCount);
+ TGuard<TMutex> guard(Mutex);
+ TNodeDataBase* node = Nodes[FirstNodeId + nodeIndex].Get();
+ if (UseRealThreads) {
+ Y_VERIFY(poolId < node->ExecutorPools.size());
+ return node->ExecutorPools[poolId]->Register(actor, mailboxType, revolvingCounter, parentId);
+ }
+
+ // first step - find good enough mailbox
+ ui32 hint = 0;
+ TMailboxHeader *mailbox = nullptr;
+
+ {
+ ui32 hintBackoff = 0;
+
+ while (hint == 0) {
+ hint = node->MailboxTable->AllocateMailbox(mailboxType, ++revolvingCounter);
+ mailbox = node->MailboxTable->Get(hint);
+
+ if (!mailbox->LockFromFree()) {
+ node->MailboxTable->ReclaimMailbox(mailboxType, hintBackoff, ++revolvingCounter);
+ hintBackoff = hint;
+ hint = 0;
+ }
+ }
+
+ node->MailboxTable->ReclaimMailbox(mailboxType, hintBackoff, ++revolvingCounter);
+ }
+
+ const ui64 localActorId = AllocateLocalId();
+ if (VERBOSE) {
+ Cerr << "Register actor " << TypeName(*actor) << " as " << localActorId << ", mailbox: " << hint << "\n";
+ }
+
+ // ok, got mailbox
+ mailbox->AttachActor(localActorId, actor);
+
+ // do init
+ const TActorId actorId(FirstNodeId + nodeIndex, poolId, localActorId, hint);
+ ActorNames[actorId] = TypeName(*actor);
+ RegistrationObserver(*this, parentId ? parentId : CurrentRecipient, actorId);
+ DoActorInit(node->ActorSystem.Get(), actor, actorId, parentId ? parentId : CurrentRecipient);
+
+ switch (mailboxType) {
+ case TMailboxType::Simple:
+ UnlockFromExecution((TMailboxTable::TSimpleMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter);
+ break;
+ case TMailboxType::Revolving:
+ UnlockFromExecution((TMailboxTable::TRevolvingMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter);
+ break;
+ case TMailboxType::HTSwap:
+ UnlockFromExecution((TMailboxTable::THTSwapMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter);
+ break;
+ case TMailboxType::ReadAsFilled:
+ UnlockFromExecution((TMailboxTable::TReadAsFilledMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter);
+ break;
+ case TMailboxType::TinyReadAsFilled:
+ UnlockFromExecution((TMailboxTable::TTinyReadAsFilledMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter);
+ break;
+ default:
+ Y_FAIL("Unsupported mailbox type");
+ }
+
+ return actorId;
+ }
+
+ TActorId TTestActorRuntimeBase::Register(IActor *actor, ui32 nodeIndex, ui32 poolId, TMailboxHeader *mailbox, ui32 hint,
+ const TActorId& parentId) {
+ Y_VERIFY(nodeIndex < NodeCount);
+ TGuard<TMutex> guard(Mutex);
+ TNodeDataBase* node = Nodes[FirstNodeId + nodeIndex].Get();
+ if (UseRealThreads) {
+ Y_VERIFY(poolId < node->ExecutorPools.size());
+ return node->ExecutorPools[poolId]->Register(actor, mailbox, hint, parentId);
+ }
+
+ const ui64 localActorId = AllocateLocalId();
+ if (VERBOSE) {
+ Cerr << "Register actor " << TypeName(*actor) << " as " << localActorId << "\n";
+ }
+
+ mailbox->AttachActor(localActorId, actor);
+ const TActorId actorId(FirstNodeId + nodeIndex, poolId, localActorId, hint);
+ ActorNames[actorId] = TypeName(*actor);
+ RegistrationObserver(*this, parentId ? parentId : CurrentRecipient, actorId);
+ DoActorInit(node->ActorSystem.Get(), actor, actorId, parentId ? parentId : CurrentRecipient);
+
+ return actorId;
+ }
+
+ TActorId TTestActorRuntimeBase::RegisterService(const TActorId& serviceId, const TActorId& actorId, ui32 nodeIndex) {
+ TGuard<TMutex> guard(Mutex);
+ Y_VERIFY(nodeIndex < NodeCount);
+ TNodeDataBase* node = Nodes[FirstNodeId + nodeIndex].Get();
+ if (!UseRealThreads) {
+ IActor* actor = FindActor(actorId, node);
+ node->LocalServicesActors[serviceId] = actor;
+ node->ActorToActorId[actor] = actorId;
+ }
+
+ return node->ActorSystem->RegisterLocalService(serviceId, actorId);
+ }
+
+ TActorId TTestActorRuntimeBase::AllocateEdgeActor(ui32 nodeIndex) {
+ TGuard<TMutex> guard(Mutex);
+ Y_VERIFY(nodeIndex < NodeCount);
+ TActorId edgeActor = Register(new TEdgeActor(this), nodeIndex);
+ EdgeActors.insert(edgeActor);
+ EdgeActorByMailbox[TEventMailboxId(edgeActor.NodeId(), edgeActor.Hint())] = edgeActor;
+ return edgeActor;
+ }
+
+ TEventsList TTestActorRuntimeBase::CaptureEvents() {
+ TGuard<TMutex> guard(Mutex);
+ TEventsList result;
+ for (auto& mbox : Mailboxes) {
+ mbox.second->Capture(result);
+ }
+
+ return result;
+ }
+
+ TEventsList TTestActorRuntimeBase::CaptureMailboxEvents(ui32 hint, ui32 nodeId) {
+ TGuard<TMutex> guard(Mutex);
+ Y_VERIFY(nodeId >= FirstNodeId && nodeId < FirstNodeId + NodeCount);
+ TEventsList result;
+ GetMailbox(nodeId, hint).Capture(result);
+ return result;
+ }
+
+ void TTestActorRuntimeBase::PushFront(TAutoPtr<IEventHandle>& ev) {
+ TGuard<TMutex> guard(Mutex);
+ ui32 nodeId = ev->GetRecipientRewrite().NodeId();
+ Y_VERIFY(nodeId != 0);
+ GetMailbox(nodeId, ev->GetRecipientRewrite().Hint()).PushFront(ev);
+ }
+
+ void TTestActorRuntimeBase::PushEventsFront(TEventsList& events) {
+ TGuard<TMutex> guard(Mutex);
+ for (auto rit = events.rbegin(); rit != events.rend(); ++rit) {
+ if (*rit) {
+ auto& ev = *rit;
+ ui32 nodeId = ev->GetRecipientRewrite().NodeId();
+ Y_VERIFY(nodeId != 0);
+ GetMailbox(nodeId, ev->GetRecipientRewrite().Hint()).PushFront(ev);
+ }
+ }
+
+ events.clear();
+ }
+
+ void TTestActorRuntimeBase::PushMailboxEventsFront(ui32 hint, ui32 nodeId, TEventsList& events) {
+ TGuard<TMutex> guard(Mutex);
+ Y_VERIFY(nodeId >= FirstNodeId && nodeId < FirstNodeId + NodeCount);
+ TEventsList result;
+ GetMailbox(nodeId, hint).PushFront(events);
+ events.clear();
+ }
+
+ TScheduledEventsList TTestActorRuntimeBase::CaptureScheduledEvents() {
+ TGuard<TMutex> guard(Mutex);
+ TScheduledEventsList result;
+ for (auto& mbox : Mailboxes) {
+ mbox.second->CaptureScheduled(result);
+ }
+
+ return result;
+ }
+
+ bool TTestActorRuntimeBase::DispatchEvents(const TDispatchOptions& options) {
+ return DispatchEvents(options, TInstant::Max());
+ }
+
+ bool TTestActorRuntimeBase::DispatchEvents(const TDispatchOptions& options, TDuration simTimeout) {
+ return DispatchEvents(options, TInstant::MicroSeconds(CurrentTimestamp) + simTimeout);
+ }
+
+ bool TTestActorRuntimeBase::DispatchEvents(const TDispatchOptions& options, TInstant simDeadline) {
+ TGuard<TMutex> guard(Mutex);
+ return DispatchEventsInternal(options, simDeadline);
+ }
+
+ // Mutex must be locked by caller!
+ bool TTestActorRuntimeBase::DispatchEventsInternal(const TDispatchOptions& options, TInstant simDeadline) {
+ TDispatchContext localContext;
+ localContext.Options = &options;
+ localContext.PrevContext = nullptr;
+ bool verbose = !options.Quiet && VERBOSE;
+
+ struct TDispatchContextSetter {
+ TDispatchContextSetter(TTestActorRuntimeBase& runtime, TDispatchContext& lastContext)
+ : Runtime(runtime)
+ {
+ lastContext.PrevContext = Runtime.CurrentDispatchContext;
+ Runtime.CurrentDispatchContext = &lastContext;
+ }
+
+ ~TDispatchContextSetter() {
+ Runtime.CurrentDispatchContext = Runtime.CurrentDispatchContext->PrevContext;
+ }
+
+ TTestActorRuntimeBase& Runtime;
+ } DispatchContextSetter(*this, localContext);
+
+ TInstant dispatchTime = TInstant::MicroSeconds(0);
+ TInstant deadline = dispatchTime + DispatchTimeout;
+ const TDuration scheduledEventsInspectInterval = TDuration::MilliSeconds(10);
+ TInstant inspectScheduledEventsAt = dispatchTime + scheduledEventsInspectInterval;
+ if (verbose) {
+ Cerr << "Start dispatch at " << TInstant::MicroSeconds(CurrentTimestamp) << ", deadline is " << deadline << "\n";
+ }
+
+ struct TTempEdgeEventsCaptor {
+ TTempEdgeEventsCaptor(TTestActorRuntimeBase& runtime)
+ : Runtime(runtime)
+ , HasEvents(false)
+ {
+ for (auto edgeActor : Runtime.EdgeActors) {
+ TEventsList events;
+ Runtime.GetMailbox(edgeActor.NodeId(), edgeActor.Hint()).Capture(events);
+ auto mboxId = TEventMailboxId(edgeActor.NodeId(), edgeActor.Hint());
+ auto storeIt = Store.find(mboxId);
+ Y_VERIFY(storeIt == Store.end());
+ storeIt = Store.insert(std::make_pair(mboxId, new TEventMailBox)).first;
+ storeIt->second->PushFront(events);
+ if (!events.empty())
+ HasEvents = true;
+ }
+ }
+
+ ~TTempEdgeEventsCaptor() {
+ for (auto edgeActor : Runtime.EdgeActors) {
+ auto mboxId = TEventMailboxId(edgeActor.NodeId(), edgeActor.Hint());
+ auto storeIt = Store.find(mboxId);
+ if (storeIt == Store.end()) {
+ continue;
+ }
+
+ TEventsList events;
+ storeIt->second->Capture(events);
+ Runtime.GetMailbox(edgeActor.NodeId(), edgeActor.Hint()).PushFront(events);
+ }
+ }
+
+ TTestActorRuntimeBase& Runtime;
+ TEventMailBoxList Store;
+ bool HasEvents;
+ };
+
+ TEventMailBoxList restrictedMailboxes;
+ const bool useRestrictedMailboxes = !options.OnlyMailboxes.empty();
+ for (auto mailboxId : options.OnlyMailboxes) {
+ auto it = Mailboxes.find(mailboxId);
+ if (it == Mailboxes.end()) {
+ it = Mailboxes.insert(std::make_pair(mailboxId, new TEventMailBox())).first;
+ }
+
+ restrictedMailboxes.insert(std::make_pair(mailboxId, it->second));
+ }
+
+ TAutoPtr<TTempEdgeEventsCaptor> tempEdgeEventsCaptor;
+ if (!restrictedMailboxes) {
+ tempEdgeEventsCaptor.Reset(new TTempEdgeEventsCaptor(*this));
+ }
+
+ TEventMailBoxList& currentMailboxes = useRestrictedMailboxes ? restrictedMailboxes : Mailboxes;
+ while (!currentMailboxes.empty()) {
+ bool hasProgress = true;
+ while (hasProgress) {
+ ++DispatchCyclesCount;
+ hasProgress = false;
+
+ ui64 eventsToDispatch = 0;
+ for (auto mboxIt = currentMailboxes.begin(); mboxIt != currentMailboxes.end(); ++mboxIt) {
+ if (mboxIt->second->IsActive(TInstant::MicroSeconds(CurrentTimestamp))) {
+ eventsToDispatch += mboxIt->second->GetSentEventCount();
+ }
+ }
+ ui32 eventsDispatched = 0;
+
+ //TODO: count events before each cycle, break after dispatching that much events
+ bool isEmpty = false;
+ while (!isEmpty && eventsDispatched < eventsToDispatch) {
+ ui64 mailboxCount = currentMailboxes.size();
+ ui64 startWith = mailboxCount ? DispatcherRandomProvider->GenRand64() % mailboxCount : 0ull;
+ auto startWithMboxIt = currentMailboxes.begin();
+ for (ui64 i = 0; i < startWith; ++i) {
+ ++startWithMboxIt;
+ }
+ auto endWithMboxIt = startWithMboxIt;
+
+ isEmpty = true;
+ auto mboxIt = startWithMboxIt;
+ TDeque<TEventMailboxId> suspectedBoxes;
+ while (true) {
+ auto& mbox = *mboxIt;
+ bool isIgnored = true;
+ if (!mbox.second->IsEmpty()) {
+ HandleNonEmptyMailboxesForEachContext(mbox.first);
+ if (mbox.second->IsActive(TInstant::MicroSeconds(CurrentTimestamp))) {
+
+ bool isEdgeMailbox = false;
+ if (EdgeActorByMailbox.FindPtr(TEventMailboxId(mbox.first.NodeId, mbox.first.Hint))) {
+ isEdgeMailbox = true;
+ TEventsList events;
+ mbox.second->Capture(events);
+ for (auto& ev : events) {
+ TInverseGuard<TMutex> inverseGuard(Mutex);
+ ObserverFunc(*this, ev);
+ }
+ mbox.second->PushFront(events);
+ }
+
+ if (!isEdgeMailbox) {
+ isEmpty = false;
+ isIgnored = false;
+ ++eventsDispatched;
+ ++DispatchedEventsCount;
+ if (DispatchedEventsCount > DispatchedEventsLimit) {
+ ythrow TWithBackTrace<yexception>() << "Dispatched "
+ << DispatchedEventsLimit << " events, limit reached.";
+ }
+
+ auto ev = mbox.second->Pop();
+ if (BlockedOutput.find(ev->Sender) == BlockedOutput.end()) {
+ //UpdateCurrentTime(TInstant::MicroSeconds(CurrentTimestamp + 10));
+ if (verbose) {
+ Cerr << "Process event at " << TInstant::MicroSeconds(CurrentTimestamp) << ", ";
+ PrintEvent(ev, this);
+ }
+ }
+
+ hasProgress = true;
+ EEventAction action;
+ {
+ TInverseGuard<TMutex> inverseGuard(Mutex);
+ action = ObserverFunc(*this, ev);
+ }
+
+ switch (action) {
+ case EEventAction::PROCESS:
+ UpdateFinalEventsStatsForEachContext(*ev);
+ SendInternal(ev.Release(), mbox.first.NodeId - FirstNodeId, false);
+ break;
+ case EEventAction::DROP:
+ // do nothing
+ break;
+ case EEventAction::RESCHEDULE: {
+ TInstant deadline = TInstant::MicroSeconds(CurrentTimestamp) + ReschedulingDelay;
+ mbox.second->Freeze(deadline);
+ mbox.second->PushFront(ev);
+ break;
+ }
+ default:
+ Y_FAIL("Unknown action");
+ }
+ }
+ }
+
+ }
+ Y_VERIFY(mboxIt != currentMailboxes.end());
+ if (!isIgnored && !CurrentDispatchContext->PrevContext && !restrictedMailboxes &&
+ mboxIt->second->IsEmpty() &&
+ mboxIt->second->IsScheduledEmpty() &&
+ mboxIt->second->IsActive(TInstant::MicroSeconds(CurrentTimestamp))) {
+ suspectedBoxes.push_back(mboxIt->first);
+ }
+ ++mboxIt;
+ if (mboxIt == currentMailboxes.end()) {
+ mboxIt = currentMailboxes.begin();
+ }
+ Y_VERIFY(endWithMboxIt != currentMailboxes.end());
+ if (mboxIt == endWithMboxIt) {
+ break;
+ }
+ }
+
+ for (auto id : suspectedBoxes) {
+ auto it = currentMailboxes.find(id);
+ if (it != currentMailboxes.end() && it->second->IsEmpty() && it->second->IsScheduledEmpty() &&
+ it->second->IsActive(TInstant::MicroSeconds(CurrentTimestamp))) {
+ currentMailboxes.erase(it);
+ }
+ }
+ }
+ }
+
+ if (localContext.FinalEventFound) {
+ return true;
+ }
+
+ if (!localContext.FoundNonEmptyMailboxes.empty())
+ return true;
+
+ if (options.CustomFinalCondition && options.CustomFinalCondition())
+ return true;
+
+ if (options.FinalEvents.empty()) {
+ for (auto& mbox : currentMailboxes) {
+ if (!mbox.second->IsActive(TInstant::MicroSeconds(CurrentTimestamp)))
+ continue;
+
+ if (!mbox.second->IsEmpty()) {
+ if (verbose) {
+ Cerr << "Dispatch complete with non-empty queue at " << TInstant::MicroSeconds(CurrentTimestamp) << "\n";
+ }
+
+ return true;
+ }
+ }
+ }
+
+ if (TInstant::MicroSeconds(CurrentTimestamp) > simDeadline) {
+ return false;
+ }
+
+ if (dispatchTime >= deadline) {
+ if (verbose) {
+ Cerr << "Reach deadline at " << TInstant::MicroSeconds(CurrentTimestamp) << "\n";
+ }
+
+ ythrow TWithBackTrace<TEmptyEventQueueException>();
+ }
+
+ if (!options.Quiet && dispatchTime >= inspectScheduledEventsAt) {
+ inspectScheduledEventsAt = dispatchTime + scheduledEventsInspectInterval;
+ bool isEmpty = true;
+ TMaybe<TInstant> nearestMailboxDeadline;
+ TVector<TIntrusivePtr<TEventMailBox>> nextScheduleMboxes;
+ TMaybe<TInstant> nextScheduleDeadline;
+ for (auto& mbox : currentMailboxes) {
+ if (!mbox.second->IsActive(TInstant::MicroSeconds(CurrentTimestamp))) {
+ if (!nearestMailboxDeadline.Defined() || *nearestMailboxDeadline.Get() > mbox.second->GetInactiveUntil()) {
+ nearestMailboxDeadline = mbox.second->GetInactiveUntil();
+ }
+
+ continue;
+ }
+
+ if (mbox.second->IsScheduledEmpty())
+ continue;
+
+ auto firstScheduleDeadline = mbox.second->GetFirstScheduleDeadline();
+ if (!nextScheduleDeadline || firstScheduleDeadline < *nextScheduleDeadline) {
+ nextScheduleMboxes.clear();
+ nextScheduleMboxes.emplace_back(mbox.second);
+ nextScheduleDeadline = firstScheduleDeadline;
+ } else if (firstScheduleDeadline == *nextScheduleDeadline) {
+ nextScheduleMboxes.emplace_back(mbox.second);
+ }
+ }
+
+ for (const auto& nextScheduleMbox : nextScheduleMboxes) {
+ TEventsList selectedEvents;
+ TScheduledEventsList capturedScheduledEvents;
+ nextScheduleMbox->CaptureScheduled(capturedScheduledEvents);
+ ScheduledEventsSelectorFunc(*this, capturedScheduledEvents, selectedEvents);
+ nextScheduleMbox->PushScheduled(capturedScheduledEvents);
+ for (auto& event : selectedEvents) {
+ if (verbose && (BlockedOutput.find(event->Sender) == BlockedOutput.end())) {
+ Cerr << "Selected scheduled event at " << TInstant::MicroSeconds(CurrentTimestamp) << ", ";
+ PrintEvent(event, this);
+ }
+
+ nextScheduleMbox->Send(event);
+ isEmpty = false;
+ }
+ }
+
+ if (!isEmpty) {
+ if (verbose) {
+ Cerr << "Process selected events at " << TInstant::MicroSeconds(CurrentTimestamp) << "\n";
+ }
+
+ deadline = dispatchTime + DispatchTimeout;
+ continue;
+ }
+
+ if (nearestMailboxDeadline.Defined()) {
+ if (verbose) {
+ Cerr << "Forward time to " << *nearestMailboxDeadline.Get() << "\n";
+ }
+
+ UpdateCurrentTime(*nearestMailboxDeadline.Get());
+ continue;
+ }
+ }
+
+ TDuration waitDelay = TDuration::MilliSeconds(10);
+ dispatchTime += waitDelay;
+ MailboxesHasEvents.WaitT(Mutex, waitDelay);
+ }
+ return false;
+ }
+
+ void TTestActorRuntimeBase::HandleNonEmptyMailboxesForEachContext(TEventMailboxId mboxId) {
+ TDispatchContext* context = CurrentDispatchContext;
+ while (context) {
+ const auto& nonEmptyMailboxes = context->Options->NonEmptyMailboxes;
+ if (Find(nonEmptyMailboxes.begin(), nonEmptyMailboxes.end(), mboxId) != nonEmptyMailboxes.end()) {
+ context->FoundNonEmptyMailboxes.insert(mboxId);
+ }
+
+ context = context->PrevContext;
+ }
+ }
+
+ void TTestActorRuntimeBase::UpdateFinalEventsStatsForEachContext(IEventHandle& ev) {
+ TDispatchContext* context = CurrentDispatchContext;
+ while (context) {
+ for (const auto& finalEvent : context->Options->FinalEvents) {
+ if (finalEvent.EventCheck(ev)) {
+ auto& freq = context->FinalEventFrequency[&finalEvent];
+ if (++freq >= finalEvent.RequiredCount) {
+ context->FinalEventFound = true;
+ }
+ }
+ }
+
+ context = context->PrevContext;
+ }
+ }
+
+ void TTestActorRuntimeBase::Send(IEventHandle* ev, ui32 senderNodeIndex, bool viaActorSystem) {
+ TGuard<TMutex> guard(Mutex);
+ Y_VERIFY(senderNodeIndex < NodeCount, "senderNodeIndex# %" PRIu32 " < NodeCount# %" PRIu32,
+ senderNodeIndex, NodeCount);
+ SendInternal(ev, senderNodeIndex, viaActorSystem);
+ }
+
+ void TTestActorRuntimeBase::Schedule(IEventHandle* ev, const TDuration& duration, ui32 nodeIndex) {
+ TGuard<TMutex> guard(Mutex);
+ Y_VERIFY(nodeIndex < NodeCount);
+ ui32 nodeId = FirstNodeId + nodeIndex;
+ ui32 mailboxHint = ev->GetRecipientRewrite().Hint();
+ TInstant deadline = TInstant::MicroSeconds(CurrentTimestamp) + duration;
+ GetMailbox(nodeId, mailboxHint).Schedule(TScheduledEventQueueItem(deadline, ev, nullptr));
+ if (VERBOSE)
+ Cerr << "Event was added to scheduled queue\n";
+ }
+
+ void TTestActorRuntimeBase::ClearCounters() {
+ TGuard<TMutex> guard(Mutex);
+ EvCounters.clear();
+ }
+
+ ui64 TTestActorRuntimeBase::GetCounter(ui32 evType) const {
+ TGuard<TMutex> guard(Mutex);
+ auto it = EvCounters.find(evType);
+ if (it == EvCounters.end())
+ return 0;
+
+ return it->second;
+ }
+
+ TActorId TTestActorRuntimeBase::GetLocalServiceId(const TActorId& serviceId, ui32 nodeIndex) {
+ TGuard<TMutex> guard(Mutex);
+ Y_VERIFY(nodeIndex < NodeCount);
+ TNodeDataBase* node = Nodes[FirstNodeId + nodeIndex].Get();
+ return node->ActorSystem->LookupLocalService(serviceId);
+ }
+
+ void TTestActorRuntimeBase::WaitForEdgeEvents(TEventFilter filter, const TSet<TActorId>& edgeFilter, TDuration simTimeout) {
+ TGuard<TMutex> guard(Mutex);
+ ui32 dispatchCount = 0;
+ if (!edgeFilter.empty()) {
+ for (auto edgeActor : edgeFilter) {
+ Y_VERIFY(EdgeActors.contains(edgeActor), "%s is not an edge actor", ToString(edgeActor).data());
+ }
+ }
+ const TSet<TActorId>& edgeActors = edgeFilter.empty() ? EdgeActors : edgeFilter;
+ TInstant deadline = TInstant::MicroSeconds(CurrentTimestamp) + simTimeout;
+ for (;;) {
+ for (auto edgeActor : edgeActors) {
+ TEventsList events;
+ auto& mbox = GetMailbox(edgeActor.NodeId(), edgeActor.Hint());
+ bool foundEvent = false;
+ mbox.Capture(events);
+ for (auto& ev : events) {
+ if (filter(*this, ev)) {
+ foundEvent = true;
+ break;
+ }
+ }
+
+ mbox.PushFront(events);
+ if (foundEvent)
+ return;
+ }
+
+ ++dispatchCount;
+ {
+ if (!DispatchEventsInternal(TDispatchOptions(), deadline)) {
+ return; // Timed out; event was not found
+ }
+ }
+
+ Y_VERIFY(dispatchCount < 1000, "Hard limit to prevent endless loop");
+ }
+ }
+
+ TActorId TTestActorRuntimeBase::GetInterconnectProxy(ui32 nodeIndexFrom, ui32 nodeIndexTo) {
+ TGuard<TMutex> guard(Mutex);
+ Y_VERIFY(nodeIndexFrom < NodeCount);
+ Y_VERIFY(nodeIndexTo < NodeCount);
+ Y_VERIFY(nodeIndexFrom != nodeIndexTo);
+ TNodeDataBase* node = Nodes[FirstNodeId + nodeIndexFrom].Get();
+ return node->ActorSystem->InterconnectProxy(FirstNodeId + nodeIndexTo);
+ }
+
+ void TTestActorRuntimeBase::BlockOutputForActor(const TActorId& actorId) {
+ TGuard<TMutex> guard(Mutex);
+ BlockedOutput.insert(actorId);
+ }
+
+ void TTestActorRuntimeBase::SetDispatcherRandomSeed(TInstant time, ui64 iteration) {
+ ui64 days = (time.Hours() / 24);
+ DispatcherRandomSeed = (days << 32) ^ iteration;
+ DispatcherRandomProvider = CreateDeterministicRandomProvider(DispatcherRandomSeed);
+ }
+
+ IActor* TTestActorRuntimeBase::FindActor(const TActorId& actorId, ui32 nodeIndex) const {
+ TGuard<TMutex> guard(Mutex);
+ if (nodeIndex == Max<ui32>()) {
+ Y_VERIFY(actorId.NodeId());
+ nodeIndex = actorId.NodeId() - FirstNodeId;
+ }
+
+ Y_VERIFY(nodeIndex < NodeCount);
+ auto nodeIt = Nodes.find(FirstNodeId + nodeIndex);
+ Y_VERIFY(nodeIt != Nodes.end());
+ TNodeDataBase* node = nodeIt->second.Get();
+ return FindActor(actorId, node);
+ }
+
+ void TTestActorRuntimeBase::EnableScheduleForActor(const TActorId& actorId, bool allow) {
+ TGuard<TMutex> guard(Mutex);
+ if (allow) {
+ if (VERBOSE) {
+ Cerr << "Actor " << actorId << " added to schedule whitelist";
+ }
+ ScheduleWhiteList.insert(actorId);
+ } else {
+ if (VERBOSE) {
+ Cerr << "Actor " << actorId << " removed from schedule whitelist";
+ }
+ ScheduleWhiteList.erase(actorId);
+ }
+ }
+
+ bool TTestActorRuntimeBase::IsScheduleForActorEnabled(const TActorId& actorId) const {
+ TGuard<TMutex> guard(Mutex);
+ return ScheduleWhiteList.find(actorId) != ScheduleWhiteList.end();
+ }
+
+ TIntrusivePtr<NMonitoring::TDynamicCounters> TTestActorRuntimeBase::GetDynamicCounters(ui32 nodeIndex) {
+ TGuard<TMutex> guard(Mutex);
+ Y_VERIFY(nodeIndex < NodeCount);
+ ui32 nodeId = FirstNodeId + nodeIndex;
+ TNodeDataBase* node = Nodes[nodeId].Get();
+ return node->DynamicCounters;
+ }
+
+ void TTestActorRuntimeBase::SetupMonitoring() {
+ NeedMonitoring = true;
+ }
+
+ void TTestActorRuntimeBase::SendInternal(IEventHandle* ev, ui32 nodeIndex, bool viaActorSystem) {
+ Y_VERIFY(nodeIndex < NodeCount);
+ ui32 nodeId = FirstNodeId + nodeIndex;
+ TNodeDataBase* node = Nodes[nodeId].Get();
+ ui32 targetNode = ev->GetRecipientRewrite().NodeId();
+ ui32 targetNodeIndex;
+ if (targetNode == 0) {
+ targetNodeIndex = nodeIndex;
+ } else {
+ targetNodeIndex = targetNode - FirstNodeId;
+ Y_VERIFY(targetNodeIndex < NodeCount);
+ }
+
+ if (viaActorSystem || UseRealThreads || ev->GetRecipientRewrite().IsService() || (targetNodeIndex != nodeIndex)) {
+ node->ActorSystem->Send(ev);
+ return;
+ }
+
+ Y_VERIFY(!ev->GetRecipientRewrite().IsService() && (targetNodeIndex == nodeIndex));
+ TAutoPtr<IEventHandle> evHolder(ev);
+
+ if (!AllowSendFrom(node, evHolder)) {
+ return;
+ }
+
+ ui32 mailboxHint = ev->GetRecipientRewrite().Hint();
+ TEventMailBox& mbox = GetMailbox(nodeId, mailboxHint);
+ if (!mbox.IsActive(TInstant::MicroSeconds(CurrentTimestamp))) {
+ mbox.PushFront(evHolder);
+ return;
+ }
+
+ ui64 recipientLocalId = ev->GetRecipientRewrite().LocalId();
+ if ((BlockedOutput.find(ev->Sender) == BlockedOutput.end()) && VERBOSE) {
+ Cerr << "Send event, ";
+ PrintEvent(evHolder, this);
+ }
+
+ EvCounters[ev->GetTypeRewrite()]++;
+
+ TMailboxHeader* mailbox = node->MailboxTable->Get(mailboxHint);
+ IActor* recipientActor = mailbox->FindActor(recipientLocalId);
+ if (recipientActor) {
+ // Save actorId by value in order to prevent ctx from being invalidated during another Send call.
+ TActorId actorId = ev->GetRecipientRewrite();
+ node->ActorToActorId[recipientActor] = ev->GetRecipientRewrite();
+ TActorContext ctx(*mailbox, *node->ExecutorThread, GetCycleCountFast(), actorId);
+ TActivationContext *prevTlsActivationContext = TlsActivationContext;
+ TlsActivationContext = &ctx;
+ CurrentRecipient = actorId;
+ {
+ TInverseGuard<TMutex> inverseGuard(Mutex);
+#ifdef USE_ACTOR_CALLSTACK
+ TCallstack::GetTlsCallstack() = ev->Callstack;
+ TCallstack::GetTlsCallstack().SetLinesToSkip();
+#endif
+ recipientActor->Receive(evHolder, ctx);
+ node->ExecutorThread->DropUnregistered();
+ }
+ CurrentRecipient = TActorId();
+ TlsActivationContext = prevTlsActivationContext;
+ } else {
+ if (VERBOSE) {
+ Cerr << "Failed to find actor with local id: " << recipientLocalId << "\n";
+ }
+
+ auto forwardedEv = ev->ForwardOnNondelivery(TEvents::TEvUndelivered::ReasonActorUnknown);
+ if (!!forwardedEv) {
+ node->ActorSystem->Send(forwardedEv);
+ }
+ }
+ }
+
+ IActor* TTestActorRuntimeBase::FindActor(const TActorId& actorId, TNodeDataBase* node) const {
+ ui32 mailboxHint = actorId.Hint();
+ ui64 localId = actorId.LocalId();
+ TMailboxHeader* mailbox = node->MailboxTable->Get(mailboxHint);
+ IActor* actor = mailbox->FindActor(localId);
+ return actor;
+ }
+
+ THolder<TActorSystemSetup> TTestActorRuntimeBase::MakeActorSystemSetup(ui32 nodeIndex, TNodeDataBase* node) {
+ THolder<TActorSystemSetup> setup(new TActorSystemSetup);
+ setup->NodeId = FirstNodeId + nodeIndex;
+
+ if (UseRealThreads) {
+ setup->ExecutorsCount = 5;
+ setup->Executors.Reset(new TAutoPtr<IExecutorPool>[5]);
+ setup->Executors[0].Reset(new TBasicExecutorPool(0, 2, 20));
+ setup->Executors[1].Reset(new TBasicExecutorPool(1, 2, 20));
+ setup->Executors[2].Reset(new TIOExecutorPool(2, 1));
+ setup->Executors[3].Reset(new TBasicExecutorPool(3, 2, 20));
+ setup->Executors[4].Reset(new TBasicExecutorPool(4, 1, 20));
+ setup->Scheduler.Reset(new TBasicSchedulerThread(TSchedulerConfig(512, 100)));
+ } else {
+ setup->ExecutorsCount = 1;
+ setup->Scheduler.Reset(new TSchedulerThreadStub(this, node));
+ setup->Executors.Reset(new TAutoPtr<IExecutorPool>[1]);
+ setup->Executors[0].Reset(new TExecutorPoolStub(this, nodeIndex, node, 0));
+ }
+
+ InitActorSystemSetup(*setup);
+
+ return setup;
+ }
+
+ THolder<TActorSystem> TTestActorRuntimeBase::MakeActorSystem(ui32 nodeIndex, TNodeDataBase* node) {
+ auto setup = MakeActorSystemSetup(nodeIndex, node);
+
+ node->ExecutorPools.resize(setup->ExecutorsCount);
+ for (ui32 i = 0; i < setup->ExecutorsCount; ++i) {
+ node->ExecutorPools[i] = setup->Executors[i].Get();
+ }
+
+ const auto& interconnectCounters = GetCountersForComponent(node->DynamicCounters, "interconnect");
+
+ setup->LocalServices = node->LocalServices;
+ setup->Interconnect.ProxyActors.resize(FirstNodeId + NodeCount);
+ const TActorId nameserviceId = GetNameserviceActorId();
+
+ TIntrusivePtr<TInterconnectProxyCommon> common;
+ common.Reset(new TInterconnectProxyCommon);
+ common->NameserviceId = nameserviceId;
+ common->MonCounters = interconnectCounters;
+ common->TechnicalSelfHostName = "::1";
+
+ if (!UseRealThreads) {
+ common->Settings.DeadPeer = TDuration::Max();
+ common->Settings.CloseOnIdle = TDuration::Max();
+ common->Settings.PingPeriod = TDuration::Max();
+ common->Settings.ForceConfirmPeriod = TDuration::Max();
+ common->Settings.Handshake = TDuration::Max();
+ }
+
+ common->ClusterUUID = ClusterUUID;
+ common->AcceptUUID = {ClusterUUID};
+
+ for (ui32 proxyNodeIndex = 0; proxyNodeIndex < NodeCount; ++proxyNodeIndex) {
+ if (proxyNodeIndex == nodeIndex)
+ continue;
+
+ const ui32 peerNodeId = FirstNodeId + proxyNodeIndex;
+
+ IActor *proxyActor = UseRealInterconnect
+ ? new TInterconnectProxyTCP(peerNodeId, common)
+ : InterconnectMock.CreateProxyMock(setup->NodeId, peerNodeId, common);
+
+ setup->Interconnect.ProxyActors[peerNodeId] = {proxyActor, TMailboxType::ReadAsFilled, InterconnectPoolId()};
+ }
+
+ setup->Interconnect.ProxyWrapperFactory = CreateProxyWrapperFactory(common, InterconnectPoolId(), &InterconnectMock);
+
+ if (UseRealInterconnect) {
+ setup->LocalServices.emplace_back(MakePollerActorId(), NActors::TActorSetupCmd(CreatePollerActor(),
+ NActors::TMailboxType::Simple, InterconnectPoolId()));
+ }
+
+ if (!SingleSysEnv) { // Single system env should do this self
+ TAutoPtr<TLogBackend> logBackend = LogBackend ? LogBackend : NActors::CreateStderrBackend();
+ NActors::TLoggerActor *loggerActor = new NActors::TLoggerActor(node->LogSettings,
+ logBackend, GetCountersForComponent(node->DynamicCounters, "utils"));
+ NActors::TActorSetupCmd loggerActorCmd(loggerActor, NActors::TMailboxType::Simple, node->GetLoggerPoolId());
+ std::pair<NActors::TActorId, NActors::TActorSetupCmd> loggerActorPair(node->LogSettings->LoggerActorId, loggerActorCmd);
+ setup->LocalServices.push_back(loggerActorPair);
+ }
+
+ return THolder<TActorSystem>(new TActorSystem(setup, node->GetAppData(), node->LogSettings));
+ }
+
+ TActorSystem* TTestActorRuntimeBase::SingleSys() const {
+ Y_VERIFY(Nodes.size() == 1, "Works only for single system env");
+
+ return Nodes.begin()->second->ActorSystem.Get();
+ }
+
+ TActorSystem* TTestActorRuntimeBase::GetAnyNodeActorSystem() {
+ for (auto& x : Nodes) {
+ return x.second->ActorSystem.Get();
+ }
+ Y_FAIL("Don't use this method.");
+ }
+
+ TActorSystem* TTestActorRuntimeBase::GetActorSystem(ui32 nodeId) {
+ auto it = Nodes.find(GetNodeId(nodeId));
+ Y_VERIFY(it != Nodes.end());
+ return it->second->ActorSystem.Get();
+ }
+
+
+ TEventMailBox& TTestActorRuntimeBase::GetMailbox(ui32 nodeId, ui32 hint) {
+ TGuard<TMutex> guard(Mutex);
+ auto mboxId = TEventMailboxId(nodeId, hint);
+ auto it = Mailboxes.find(mboxId);
+ if (it == Mailboxes.end()) {
+ it = Mailboxes.insert(std::make_pair(mboxId, new TEventMailBox())).first;
+ }
+
+ return *it->second;
+ }
+
+ void TTestActorRuntimeBase::ClearMailbox(ui32 nodeId, ui32 hint) {
+ TGuard<TMutex> guard(Mutex);
+ auto mboxId = TEventMailboxId(nodeId, hint);
+ Mailboxes.erase(mboxId);
+ }
+
+ TString TTestActorRuntimeBase::GetActorName(const TActorId& actorId) const {
+ auto it = ActorNames.find(actorId);
+ if (it != ActorNames.end())
+ return it->second;
+ return actorId.ToString();
+ }
+
+ struct TStrandingActorDecoratorContext : public TThrRefBase {
+ TStrandingActorDecoratorContext()
+ : Queue(new TQueueType)
+ {
+ }
+
+ typedef TOneOneQueueInplace<IEventHandle*, 32> TQueueType;
+ TAutoPtr<TQueueType, TQueueType::TPtrCleanDestructor> Queue;
+ };
+
+ class TStrandingActorDecorator : public TActorBootstrapped<TStrandingActorDecorator> {
+ public:
+ class TReplyActor : public TActor<TReplyActor> {
+ public:
+ static constexpr EActivityType ActorActivityType() {
+ return TEST_ACTOR_RUNTIME;
+ }
+
+ TReplyActor(TStrandingActorDecorator* owner)
+ : TActor(&TReplyActor::StateFunc)
+ , Owner(owner)
+ {
+ }
+
+ STFUNC(StateFunc);
+
+ private:
+ TStrandingActorDecorator* const Owner;
+ };
+
+ static constexpr EActivityType ActorActivityType() {
+ return TEST_ACTOR_RUNTIME;
+ }
+
+ TStrandingActorDecorator(const TActorId& delegatee, bool isSync, const TVector<TActorId>& additionalActors,
+ TSimpleSharedPtr<TStrandingActorDecoratorContext> context, TTestActorRuntimeBase* runtime,
+ TReplyCheckerCreator createReplyChecker)
+ : Delegatee(delegatee)
+ , IsSync(isSync)
+ , AdditionalActors(additionalActors)
+ , Context(context)
+ , HasReply(false)
+ , Runtime(runtime)
+ , ReplyChecker(createReplyChecker())
+ {
+ if (IsSync) {
+ Y_VERIFY(!runtime->IsRealThreads());
+ }
+ }
+
+ void Bootstrap(const TActorContext& ctx) {
+ Become(&TStrandingActorDecorator::StateFunc);
+ ReplyId = ctx.RegisterWithSameMailbox(new TReplyActor(this));
+ DelegateeOptions.OnlyMailboxes.push_back(TEventMailboxId(Delegatee.NodeId(), Delegatee.Hint()));
+ for (const auto& actor : AdditionalActors) {
+ DelegateeOptions.OnlyMailboxes.push_back(TEventMailboxId(actor.NodeId(), actor.Hint()));
+ }
+
+ DelegateeOptions.OnlyMailboxes.push_back(TEventMailboxId(ReplyId.NodeId(), ReplyId.Hint()));
+ DelegateeOptions.NonEmptyMailboxes.push_back(TEventMailboxId(ReplyId.NodeId(), ReplyId.Hint()));
+ DelegateeOptions.Quiet = true;
+ }
+
+ STFUNC(StateFunc) {
+ bool wasEmpty = !Context->Queue->Head();
+ Context->Queue->Push(ev.Release());
+ if (wasEmpty) {
+ SendHead(ctx);
+ }
+ }
+
+ STFUNC(Reply) {
+ Y_VERIFY(!HasReply);
+ IEventHandle *requestEv = Context->Queue->Head();
+ TActorId originalSender = requestEv->Sender;
+ HasReply = !ReplyChecker->IsWaitingForMoreResponses(ev.Get());
+ if (HasReply) {
+ delete Context->Queue->Pop();
+ }
+ ctx.ExecutorThread.Send(ev->Forward(originalSender));
+ if (!IsSync && Context->Queue->Head()) {
+ SendHead(ctx);
+ }
+ }
+
+ private:
+ void SendHead(const TActorContext& ctx) {
+ if (!IsSync) {
+ ctx.ExecutorThread.Send(GetForwardedEvent().Release());
+ } else {
+ while (Context->Queue->Head()) {
+ HasReply = false;
+ ctx.ExecutorThread.Send(GetForwardedEvent().Release());
+ int count = 100;
+ while (!HasReply && count > 0) {
+ try {
+ Runtime->DispatchEvents(DelegateeOptions);
+ } catch (TEmptyEventQueueException&) {
+ count--;
+ Cerr << "No reply" << Endl;
+ }
+ }
+
+ Runtime->UpdateCurrentTime(Runtime->GetCurrentTime() + TDuration::MicroSeconds(1000));
+ }
+ }
+ }
+
+ TAutoPtr<IEventHandle> GetForwardedEvent() {
+ IEventHandle* ev = Context->Queue->Head();
+ ReplyChecker->OnRequest(ev);
+ TAutoPtr<IEventHandle> forwardedEv = ev->HasEvent()
+ ? new IEventHandle(Delegatee, ReplyId, ev->ReleaseBase().Release(), ev->Flags, ev->Cookie)
+ : new IEventHandle(ev->GetTypeRewrite(), ev->Flags, Delegatee, ReplyId, ev->ReleaseChainBuffer(), ev->Cookie);
+
+ return forwardedEv;
+ }
+ private:
+ const TActorId Delegatee;
+ const bool IsSync;
+ const TVector<TActorId> AdditionalActors;
+ TSimpleSharedPtr<TStrandingActorDecoratorContext> Context;
+ TActorId ReplyId;
+ bool HasReply;
+ TDispatchOptions DelegateeOptions;
+ TTestActorRuntimeBase* Runtime;
+ THolder<IReplyChecker> ReplyChecker;
+ };
+
+ void TStrandingActorDecorator::TReplyActor::StateFunc(STFUNC_SIG) {
+ Owner->Reply(ev, ctx);
+ }
+
+ class TStrandingDecoratorFactory : public IStrandingDecoratorFactory {
+ public:
+ TStrandingDecoratorFactory(TTestActorRuntimeBase* runtime,
+ TReplyCheckerCreator createReplyChecker)
+ : Context(new TStrandingActorDecoratorContext())
+ , Runtime(runtime)
+ , CreateReplyChecker(createReplyChecker)
+ {
+ }
+
+ IActor* Wrap(const TActorId& delegatee, bool isSync, const TVector<TActorId>& additionalActors) override {
+ return new TStrandingActorDecorator(delegatee, isSync, additionalActors, Context, Runtime,
+ CreateReplyChecker);
+ }
+
+ private:
+ TSimpleSharedPtr<TStrandingActorDecoratorContext> Context;
+ TTestActorRuntimeBase* Runtime;
+ TReplyCheckerCreator CreateReplyChecker;
+ };
+
+ TAutoPtr<IStrandingDecoratorFactory> CreateStrandingDecoratorFactory(TTestActorRuntimeBase* runtime,
+ TReplyCheckerCreator createReplyChecker) {
+ return TAutoPtr<IStrandingDecoratorFactory>(new TStrandingDecoratorFactory(runtime, createReplyChecker));
+ }
+
+ ui64 DefaultRandomSeed = 9999;
+}
diff --git a/library/cpp/actors/testlib/test_runtime.h b/library/cpp/actors/testlib/test_runtime.h
new file mode 100644
index 0000000000..26e3b45c98
--- /dev/null
+++ b/library/cpp/actors/testlib/test_runtime.h
@@ -0,0 +1,716 @@
+#pragma once
+
+#include <library/cpp/actors/core/actor.h>
+#include <library/cpp/actors/core/actorsystem.h>
+#include <library/cpp/actors/core/log.h>
+#include <library/cpp/actors/core/events.h>
+#include <library/cpp/actors/core/executor_thread.h>
+#include <library/cpp/actors/core/mailbox.h>
+#include <library/cpp/actors/util/should_continue.h>
+#include <library/cpp/actors/interconnect/poller_tcp.h>
+#include <library/cpp/actors/interconnect/mock/ic_mock.h>
+#include <library/cpp/random_provider/random_provider.h>
+#include <library/cpp/time_provider/time_provider.h>
+#include <library/cpp/testing/unittest/tests_data.h>
+
+#include <util/datetime/base.h>
+#include <util/folder/tempdir.h>
+#include <util/generic/deque.h>
+#include <util/generic/hash.h>
+#include <util/generic/noncopyable.h>
+#include <util/generic/ptr.h>
+#include <util/generic/queue.h>
+#include <util/generic/set.h>
+#include <util/generic/vector.h>
+#include <util/system/defaults.h>
+#include <util/system/mutex.h>
+#include <util/system/condvar.h>
+#include <util/system/thread.h>
+#include <util/system/sanitizers.h>
+#include <util/system/valgrind.h>
+#include <utility>
+
+#include <functional>
+
+const TDuration DEFAULT_DISPATCH_TIMEOUT = NSan::PlainOrUnderSanitizer(
+ NValgrind::PlainOrUnderValgrind(TDuration::Seconds(60), TDuration::Seconds(120)),
+ TDuration::Seconds(120)
+);
+
+
+namespace NActors {
+ struct THeSingleSystemEnv { };
+
+ struct TEventMailboxId {
+ TEventMailboxId()
+ : NodeId(0)
+ , Hint(0)
+ {
+ }
+
+ TEventMailboxId(ui32 nodeId, ui32 hint)
+ : NodeId(nodeId)
+ , Hint(hint)
+ {
+ }
+
+ bool operator<(const TEventMailboxId& other) const {
+ return (NodeId < other.NodeId) || (NodeId == other.NodeId) && (Hint < other.Hint);
+ }
+
+ bool operator==(const TEventMailboxId& other) const {
+ return (NodeId == other.NodeId) && (Hint == other.Hint);
+ }
+
+ struct THash {
+ ui64 operator()(const TEventMailboxId& mboxId) const noexcept {
+ return mboxId.NodeId * 31ULL + mboxId.Hint;
+ }
+ };
+
+ ui32 NodeId;
+ ui32 Hint;
+ };
+
+ struct TDispatchOptions {
+ struct TFinalEventCondition {
+ std::function<bool(IEventHandle& ev)> EventCheck;
+ ui32 RequiredCount;
+
+ TFinalEventCondition(ui32 eventType, ui32 requiredCount = 1)
+ : EventCheck([eventType](IEventHandle& ev) -> bool { return ev.GetTypeRewrite() == eventType; })
+ , RequiredCount(requiredCount)
+ {
+ }
+
+ TFinalEventCondition(std::function<bool(IEventHandle& ev)> eventCheck, ui32 requiredCount = 1)
+ : EventCheck(eventCheck)
+ , RequiredCount(requiredCount)
+ {
+ }
+ };
+
+ TVector<TFinalEventCondition> FinalEvents;
+ TVector<TEventMailboxId> NonEmptyMailboxes;
+ TVector<TEventMailboxId> OnlyMailboxes;
+ std::function<bool()> CustomFinalCondition;
+ bool Quiet = false;
+ };
+
+ struct TScheduledEventQueueItem {
+ TInstant Deadline;
+ TAutoPtr<IEventHandle> Event;
+ TAutoPtr<TSchedulerCookieHolder> Cookie;
+ ui64 UniqueId;
+
+ TScheduledEventQueueItem(TInstant deadline, TAutoPtr<IEventHandle> event, ISchedulerCookie* cookie)
+ : Deadline(deadline)
+ , Event(event)
+ , Cookie(new TSchedulerCookieHolder(cookie))
+ , UniqueId(++NextUniqueId)
+ {}
+
+ bool operator<(const TScheduledEventQueueItem& other) const {
+ if (Deadline < other.Deadline)
+ return true;
+
+ if (Deadline > other.Deadline)
+ return false;
+
+ return UniqueId < other.UniqueId;
+ }
+
+ static ui64 NextUniqueId;
+ };
+
+ typedef TDeque<TAutoPtr<IEventHandle>> TEventsList;
+ typedef TSet<TScheduledEventQueueItem> TScheduledEventsList;
+
+ class TEventMailBox : public TThrRefBase {
+ public:
+ TEventMailBox()
+ : InactiveUntil(TInstant::MicroSeconds(0))
+#ifdef DEBUG_ORDER_EVENTS
+ , ExpectedReceive(0)
+ , NextToSend(0)
+#endif
+ {
+ }
+
+ void Send(TAutoPtr<IEventHandle> ev);
+ bool IsEmpty() const;
+ TAutoPtr<IEventHandle> Pop();
+ void Capture(TEventsList& evList);
+ void PushFront(TAutoPtr<IEventHandle>& ev);
+ void PushFront(TEventsList& evList);
+ void CaptureScheduled(TScheduledEventsList& evList);
+ void PushScheduled(TScheduledEventsList& evList);
+ bool IsActive(const TInstant& currentTime) const;
+ void Freeze(const TInstant& deadline);
+ TInstant GetInactiveUntil() const;
+ void Schedule(const TScheduledEventQueueItem& item);
+ bool IsScheduledEmpty() const;
+ TInstant GetFirstScheduleDeadline() const;
+ ui64 GetSentEventCount() const;
+
+ private:
+ TScheduledEventsList Scheduled;
+ TInstant InactiveUntil;
+ TEventsList Sent;
+#ifdef DEBUG_ORDER_EVENTS
+ TMap<IEventHandle*, ui64> TrackSent;
+ ui64 ExpectedReceive;
+ ui64 NextToSend;
+#endif
+ };
+
+ typedef THashMap<TEventMailboxId, TIntrusivePtr<TEventMailBox>, TEventMailboxId::THash> TEventMailBoxList;
+
+ class TEmptyEventQueueException : public yexception {
+ public:
+ TEmptyEventQueueException() {
+ Append("Event queue is still empty.");
+ }
+ };
+
+ class TSchedulingLimitReachedException : public yexception {
+ public:
+ TSchedulingLimitReachedException(ui64 limit) {
+ TStringStream str;
+ str << "TestActorRuntime Processed over " << limit << " events.";
+ Append(str.Str());
+ }
+ };
+
+ class TTestActorRuntimeBase: public TNonCopyable {
+ public:
+ class TEdgeActor;
+ class TSchedulerThreadStub;
+ class TExecutorPoolStub;
+ class TTimeProvider;
+
+ enum class EEventAction {
+ PROCESS,
+ DROP,
+ RESCHEDULE
+ };
+
+ typedef std::function<EEventAction(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event)> TEventObserver;
+ typedef std::function<void(TTestActorRuntimeBase& runtime, TScheduledEventsList& scheduledEvents, TEventsList& queue)> TScheduledEventsSelector;
+ typedef std::function<bool(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event)> TEventFilter;
+ typedef std::function<bool(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event, TDuration delay, TInstant& deadline)> TScheduledEventFilter;
+ typedef std::function<void(TTestActorRuntimeBase& runtime, const TActorId& parentId, const TActorId& actorId)> TRegistrationObserver;
+
+
+ TTestActorRuntimeBase(THeSingleSystemEnv);
+ TTestActorRuntimeBase(ui32 nodeCount, ui32 dataCenterCount, bool UseRealThreads);
+ TTestActorRuntimeBase(ui32 nodeCount, ui32 dataCenterCount);
+ TTestActorRuntimeBase(ui32 nodeCount = 1, bool useRealThreads = false);
+ virtual ~TTestActorRuntimeBase();
+ bool IsRealThreads() const;
+ static EEventAction DefaultObserverFunc(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event);
+ static void DroppingScheduledEventsSelector(TTestActorRuntimeBase& runtime, TScheduledEventsList& scheduledEvents, TEventsList& queue);
+ static void CollapsedTimeScheduledEventsSelector(TTestActorRuntimeBase& runtime, TScheduledEventsList& scheduledEvents, TEventsList& queue);
+ static bool DefaultFilterFunc(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event);
+ static bool NopFilterFunc(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event, TDuration delay, TInstant& deadline);
+ static void DefaultRegistrationObserver(TTestActorRuntimeBase& runtime, const TActorId& parentId, const TActorId& actorId);
+ TEventObserver SetObserverFunc(TEventObserver observerFunc);
+ TScheduledEventsSelector SetScheduledEventsSelectorFunc(TScheduledEventsSelector scheduledEventsSelectorFunc);
+ TEventFilter SetEventFilter(TEventFilter filterFunc);
+ TScheduledEventFilter SetScheduledEventFilter(TScheduledEventFilter filterFunc);
+ TRegistrationObserver SetRegistrationObserverFunc(TRegistrationObserver observerFunc);
+ static bool IsVerbose();
+ static void SetVerbose(bool verbose);
+ TDuration SetDispatchTimeout(TDuration timeout);
+ void SetDispatchedEventsLimit(ui64 limit) {
+ DispatchedEventsLimit = limit;
+ }
+ TDuration SetReschedulingDelay(TDuration delay);
+ void SetLogBackend(const TAutoPtr<TLogBackend> logBackend);
+ void SetLogPriority(NActors::NLog::EComponent component, NActors::NLog::EPriority priority);
+ TIntrusivePtr<ITimeProvider> GetTimeProvider();
+ TInstant GetCurrentTime() const;
+ void UpdateCurrentTime(TInstant newTime);
+ void AdvanceCurrentTime(TDuration duration);
+ void AddLocalService(const TActorId& actorId, const TActorSetupCmd& cmd, ui32 nodeIndex = 0);
+ virtual void Initialize();
+ ui32 GetNodeId(ui32 index = 0) const;
+ ui32 GetNodeCount() const;
+ ui64 AllocateLocalId();
+ ui32 InterconnectPoolId() const;
+ TString GetTempDir();
+ TActorId Register(IActor* actor, ui32 nodeIndex = 0, ui32 poolId = 0,
+ TMailboxType::EType mailboxType = TMailboxType::Simple, ui64 revolvingCounter = 0,
+ const TActorId& parentid = TActorId());
+ TActorId Register(IActor *actor, ui32 nodeIndex, ui32 poolId, TMailboxHeader *mailbox, ui32 hint,
+ const TActorId& parentid = TActorId());
+ TActorId RegisterService(const TActorId& serviceId, const TActorId& actorId, ui32 nodeIndex = 0);
+ TActorId AllocateEdgeActor(ui32 nodeIndex = 0);
+ TEventsList CaptureEvents();
+ TEventsList CaptureMailboxEvents(ui32 hint, ui32 nodeId);
+ TScheduledEventsList CaptureScheduledEvents();
+ void PushFront(TAutoPtr<IEventHandle>& ev);
+ void PushEventsFront(TEventsList& events);
+ void PushMailboxEventsFront(ui32 hint, ui32 nodeId, TEventsList& events);
+ // doesn't dispatch events for edge actors
+ bool DispatchEvents(const TDispatchOptions& options = TDispatchOptions());
+ bool DispatchEvents(const TDispatchOptions& options, TDuration simTimeout);
+ bool DispatchEvents(const TDispatchOptions& options, TInstant simDeadline);
+ void Send(IEventHandle* ev, ui32 senderNodeIndex = 0, bool viaActorSystem = false);
+ void Schedule(IEventHandle* ev, const TDuration& duration, ui32 nodeIndex = 0);
+ void ClearCounters();
+ ui64 GetCounter(ui32 evType) const;
+ TActorId GetLocalServiceId(const TActorId& serviceId, ui32 nodeIndex = 0);
+ void WaitForEdgeEvents(TEventFilter filter, const TSet<TActorId>& edgeFilter = {}, TDuration simTimeout = TDuration::Max());
+ TActorId GetInterconnectProxy(ui32 nodeIndexFrom, ui32 nodeIndexTo);
+ void BlockOutputForActor(const TActorId& actorId);
+ IActor* FindActor(const TActorId& actorId, ui32 nodeIndex = Max<ui32>()) const;
+ void EnableScheduleForActor(const TActorId& actorId, bool allow = true);
+ bool IsScheduleForActorEnabled(const TActorId& actorId) const;
+ TIntrusivePtr<NMonitoring::TDynamicCounters> GetDynamicCounters(ui32 nodeIndex = 0);
+ void SetupMonitoring();
+
+ template<typename T>
+ void AppendToLogSettings(NLog::EComponent minVal, NLog::EComponent maxVal, T func) {
+ Y_VERIFY(!IsInitialized);
+
+ for (const auto& pair : Nodes) {
+ pair.second->LogSettings->Append(minVal, maxVal, func);
+ }
+ }
+
+ TIntrusivePtr<NLog::TSettings> GetLogSettings(ui32 nodeIdx)
+ {
+ return Nodes[FirstNodeId + nodeIdx]->LogSettings;
+ }
+
+ TActorSystem* SingleSys() const;
+ TActorSystem* GetAnyNodeActorSystem();
+ TActorSystem* GetActorSystem(ui32 nodeId);
+ template <typename TEvent>
+ TEvent* GrabEdgeEventIf(TAutoPtr<IEventHandle>& handle, std::function<bool(const TEvent&)> predicate, TDuration simTimeout = TDuration::Max()) {
+ handle.Destroy();
+ const ui32 eventType = TEvent::EventType;
+ WaitForEdgeEvents([&](TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event) {
+ Y_UNUSED(runtime);
+ if (event->GetTypeRewrite() != eventType)
+ return false;
+
+ TEvent* typedEvent = reinterpret_cast<TAutoPtr<TEventHandle<TEvent>>&>(event)->Get();
+ if (predicate(*typedEvent)) {
+ handle = event;
+ return true;
+ }
+
+ return false;
+ }, {}, simTimeout);
+
+ if (simTimeout == TDuration::Max())
+ Y_VERIFY(handle);
+
+ if (handle) {
+ return reinterpret_cast<TAutoPtr<TEventHandle<TEvent>>&>(handle)->Get();
+ } else {
+ return nullptr;
+ }
+ }
+
+ template<class TEvent>
+ typename TEvent::TPtr GrabEdgeEventIf(
+ const TSet<TActorId>& edgeFilter,
+ const std::function<bool(const typename TEvent::TPtr&)>& predicate,
+ TDuration simTimeout = TDuration::Max())
+ {
+ typename TEvent::TPtr handle;
+ const ui32 eventType = TEvent::EventType;
+ WaitForEdgeEvents([&](TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event) {
+ Y_UNUSED(runtime);
+ if (event->GetTypeRewrite() != eventType)
+ return false;
+
+ typename TEvent::TPtr* typedEvent = reinterpret_cast<typename TEvent::TPtr*>(&event);
+ if (predicate(*typedEvent)) {
+ handle = *typedEvent;
+ return true;
+ }
+
+ return false;
+ }, edgeFilter, simTimeout);
+
+ if (simTimeout == TDuration::Max())
+ Y_VERIFY(handle);
+
+ return handle;
+ }
+
+ template<class TEvent>
+ typename TEvent::TPtr GrabEdgeEventIf(
+ const TActorId& edgeActor,
+ const std::function<bool(const typename TEvent::TPtr&)>& predicate,
+ TDuration simTimeout = TDuration::Max())
+ {
+ TSet<TActorId> edgeFilter{edgeActor};
+ return GrabEdgeEventIf<TEvent>(edgeFilter, predicate, simTimeout);
+ }
+
+ template <typename TEvent>
+ TEvent* GrabEdgeEvent(TAutoPtr<IEventHandle>& handle, TDuration simTimeout = TDuration::Max()) {
+ std::function<bool(const TEvent&)> truth = [](const TEvent&) { return true; };
+ return GrabEdgeEventIf(handle, truth, simTimeout);
+ }
+
+ template <typename TEvent>
+ THolder<TEvent> GrabEdgeEvent(TDuration simTimeout = TDuration::Max()) {
+ TAutoPtr<IEventHandle> handle;
+ std::function<bool(const TEvent&)> truth = [](const TEvent&) { return true; };
+ GrabEdgeEventIf(handle, truth, simTimeout);
+ return THolder(handle ? handle->Release<TEvent>().Release() : nullptr);
+ }
+
+ template<class TEvent>
+ typename TEvent::TPtr GrabEdgeEvent(const TSet<TActorId>& edgeFilter, TDuration simTimeout = TDuration::Max()) {
+ return GrabEdgeEventIf<TEvent>(edgeFilter, [](const typename TEvent::TPtr&) { return true; }, simTimeout);
+ }
+
+ template<class TEvent>
+ typename TEvent::TPtr GrabEdgeEvent(const TActorId& edgeActor, TDuration simTimeout = TDuration::Max()) {
+ TSet<TActorId> edgeFilter{edgeActor};
+ return GrabEdgeEvent<TEvent>(edgeFilter, simTimeout);
+ }
+
+ // replace with std::variant<>
+ template <typename... TEvents>
+ std::tuple<TEvents*...> GrabEdgeEvents(TAutoPtr<IEventHandle>& handle, TDuration simTimeout = TDuration::Max()) {
+ handle.Destroy();
+ auto eventTypes = { TEvents::EventType... };
+ WaitForEdgeEvents([&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& event) {
+ if (std::find(std::begin(eventTypes), std::end(eventTypes), event->GetTypeRewrite()) == std::end(eventTypes))
+ return false;
+ handle = event;
+ return true;
+ }, {}, simTimeout);
+ if (simTimeout == TDuration::Max())
+ Y_VERIFY(handle);
+ if (handle) {
+ return std::make_tuple(handle->Type == TEvents::EventType
+ ? reinterpret_cast<TAutoPtr<TEventHandle<TEvents>>&>(handle)->Get()
+ : static_cast<TEvents*>(nullptr)...);
+ }
+ return {};
+ }
+
+ template <typename TEvent>
+ TEvent* GrabEdgeEventRethrow(TAutoPtr<IEventHandle>& handle, TDuration simTimeout = TDuration::Max()) {
+ try {
+ return GrabEdgeEvent<TEvent>(handle, simTimeout);
+ } catch (...) {
+ ythrow TWithBackTrace<yexception>() << "Exception occured while waiting for " << TypeName<TEvent>() << ": " << CurrentExceptionMessage();
+ }
+ }
+
+ template<class TEvent>
+ typename TEvent::TPtr GrabEdgeEventRethrow(const TSet<TActorId>& edgeFilter, TDuration simTimeout = TDuration::Max()) {
+ try {
+ return GrabEdgeEvent<TEvent>(edgeFilter, simTimeout);
+ } catch (...) {
+ ythrow TWithBackTrace<yexception>() << "Exception occured while waiting for " << TypeName<TEvent>() << ": " << CurrentExceptionMessage();
+ }
+ }
+
+ template<class TEvent>
+ typename TEvent::TPtr GrabEdgeEventRethrow(const TActorId& edgeActor, TDuration simTimeout = TDuration::Max()) {
+ try {
+ return GrabEdgeEvent<TEvent>(edgeActor, simTimeout);
+ } catch (...) {
+ ythrow TWithBackTrace<yexception>() << "Exception occured while waiting for " << TypeName<TEvent>() << ": " << CurrentExceptionMessage();
+ }
+ }
+
+ template <typename... TEvents>
+ static TString TypeNames() {
+ static TString names[] = { TypeName<TEvents>()... };
+ TString result;
+ for (const TString& s : names) {
+ if (result.empty()) {
+ result += '<';
+ } else {
+ result += ',';
+ }
+ result += s;
+ }
+ if (!result.empty()) {
+ result += '>';
+ }
+ return result;
+ }
+
+ template <typename... TEvents>
+ std::tuple<TEvents*...> GrabEdgeEventsRethrow(TAutoPtr<IEventHandle>& handle, TDuration simTimeout = TDuration::Max()) {
+ try {
+ return GrabEdgeEvents<TEvents...>(handle, simTimeout);
+ } catch (...) {
+ ythrow TWithBackTrace<yexception>() << "Exception occured while waiting for " << TypeNames<TEvents...>() << ": " << CurrentExceptionMessage();
+ }
+ }
+
+ void ResetScheduledCount() {
+ ScheduledCount = 0;
+ }
+
+ void SetScheduledLimit(ui64 limit) {
+ ScheduledLimit = limit;
+ }
+
+ void SetDispatcherRandomSeed(TInstant time, ui64 iteration);
+ TString GetActorName(const TActorId& actorId) const;
+
+ const TVector<ui64>& GetTxAllocatorTabletIds() const { return TxAllocatorTabletIds; }
+ void SetTxAllocatorTabletIds(const TVector<ui64>& ids) { TxAllocatorTabletIds = ids; }
+
+ void SetUseRealInterconnect() {
+ UseRealInterconnect = true;
+ }
+
+ protected:
+ struct TNodeDataBase;
+ TNodeDataBase* GetRawNode(ui32 node) const {
+ return Nodes.at(FirstNodeId + node).Get();
+ }
+
+ static IExecutorPool* CreateExecutorPoolStub(TTestActorRuntimeBase* runtime, ui32 nodeIndex, TNodeDataBase* node, ui32 poolId);
+ virtual TIntrusivePtr<NMonitoring::TDynamicCounters> GetCountersForComponent(TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const char* component) {
+ Y_UNUSED(counters);
+ Y_UNUSED(component);
+
+ // do nothing, just return the existing counters
+ return counters;
+ }
+
+ THolder<TActorSystemSetup> MakeActorSystemSetup(ui32 nodeIndex, TNodeDataBase* node);
+ THolder<TActorSystem> MakeActorSystem(ui32 nodeIndex, TNodeDataBase* node);
+ virtual void InitActorSystemSetup(TActorSystemSetup& setup) {
+ Y_UNUSED(setup);
+ }
+
+ private:
+ IActor* FindActor(const TActorId& actorId, TNodeDataBase* node) const;
+ void SendInternal(IEventHandle* ev, ui32 nodeIndex, bool viaActorSystem);
+ TEventMailBox& GetMailbox(ui32 nodeId, ui32 hint);
+ void ClearMailbox(ui32 nodeId, ui32 hint);
+ void HandleNonEmptyMailboxesForEachContext(TEventMailboxId mboxId);
+ void UpdateFinalEventsStatsForEachContext(IEventHandle& ev);
+ bool DispatchEventsInternal(const TDispatchOptions& options, TInstant simDeadline);
+
+ private:
+ ui64 ScheduledCount;
+ ui64 ScheduledLimit;
+ THolder<TTempDir> TmpDir;
+ const TThread::TId MainThreadId;
+
+ protected:
+ bool UseRealInterconnect = false;
+ TInterconnectMock InterconnectMock;
+ bool IsInitialized = false;
+ bool SingleSysEnv = false;
+ const TString ClusterUUID;
+ const ui32 FirstNodeId;
+ const ui32 NodeCount;
+ const ui32 DataCenterCount;
+ const bool UseRealThreads;
+
+ ui64 LocalId;
+ TMutex Mutex;
+ TCondVar MailboxesHasEvents;
+ TEventMailBoxList Mailboxes;
+ TMap<ui32, ui64> EvCounters;
+ ui64 DispatchCyclesCount;
+ ui64 DispatchedEventsCount;
+ ui64 DispatchedEventsLimit = 2'500'000;
+ TActorId CurrentRecipient;
+ ui64 DispatcherRandomSeed;
+ TIntrusivePtr<IRandomProvider> DispatcherRandomProvider;
+ TAutoPtr<TLogBackend> LogBackend;
+ bool NeedMonitoring;
+
+ TIntrusivePtr<IRandomProvider> RandomProvider;
+ TIntrusivePtr<ITimeProvider> TimeProvider;
+
+ protected:
+ struct TNodeDataBase: public TThrRefBase {
+ TNodeDataBase();
+ void Stop();
+ virtual ~TNodeDataBase();
+ virtual ui64 GetLoggerPoolId() const {
+ return 0;
+ }
+
+ template <typename T = void>
+ T* GetAppData() {
+ return static_cast<T*>(AppData0.get());
+ }
+
+ template <typename T = void>
+ const T* GetAppData() const {
+ return static_cast<T*>(AppData0.get());
+ }
+
+ TIntrusivePtr<NMonitoring::TDynamicCounters> DynamicCounters;
+ TIntrusivePtr<NActors::NLog::TSettings> LogSettings;
+ TIntrusivePtr<NInterconnect::TPollerThreads> Poller;
+ volatile ui64* ActorSystemTimestamp;
+ volatile ui64* ActorSystemMonotonic;
+ TVector<std::pair<TActorId, TActorSetupCmd> > LocalServices;
+ TMap<TActorId, IActor*> LocalServicesActors;
+ TMap<IActor*, TActorId> ActorToActorId;
+ THolder<TMailboxTable> MailboxTable;
+ std::shared_ptr<void> AppData0;
+ THolder<TActorSystem> ActorSystem;
+ THolder<IExecutorPool> SchedulerPool;
+ TVector<IExecutorPool*> ExecutorPools;
+ THolder<TExecutorThread> ExecutorThread;
+ };
+
+ struct INodeFactory {
+ virtual ~INodeFactory() = default;
+ virtual TIntrusivePtr<TNodeDataBase> CreateNode() = 0;
+ };
+
+ struct TDefaultNodeFactory final: INodeFactory {
+ virtual TIntrusivePtr<TNodeDataBase> CreateNode() override {
+ return new TNodeDataBase();
+ }
+ };
+
+ INodeFactory& GetNodeFactory() {
+ return *NodeFactory;
+ }
+
+ virtual TNodeDataBase* GetNodeById(size_t idx) {
+ return Nodes[idx].Get();
+ }
+
+ void InitNodes();
+ void CleanupNodes();
+ virtual void InitNodeImpl(TNodeDataBase*, size_t);
+
+ static bool AllowSendFrom(TNodeDataBase* node, TAutoPtr<IEventHandle>& ev);
+
+ protected:
+ THolder<INodeFactory> NodeFactory{new TDefaultNodeFactory};
+
+ private:
+ void InitNode(TNodeDataBase* node, size_t idx);
+
+ struct TDispatchContext {
+ const TDispatchOptions* Options;
+ TDispatchContext* PrevContext;
+
+ TMap<const TDispatchOptions::TFinalEventCondition*, ui32> FinalEventFrequency;
+ TSet<TEventMailboxId> FoundNonEmptyMailboxes;
+ bool FinalEventFound = false;
+ };
+
+ TProgramShouldContinue ShouldContinue;
+ TMap<ui32, TIntrusivePtr<TNodeDataBase>> Nodes;
+ ui64 CurrentTimestamp;
+ TSet<TActorId> EdgeActors;
+ THashMap<TEventMailboxId, TActorId, TEventMailboxId::THash> EdgeActorByMailbox;
+ TDuration DispatchTimeout;
+ TDuration ReschedulingDelay;
+ TEventObserver ObserverFunc;
+ TScheduledEventsSelector ScheduledEventsSelectorFunc;
+ TEventFilter EventFilterFunc;
+ TScheduledEventFilter ScheduledEventFilterFunc;
+ TRegistrationObserver RegistrationObserver;
+ TSet<TActorId> BlockedOutput;
+ TSet<TActorId> ScheduleWhiteList;
+ THashMap<TActorId, TActorId> ScheduleWhiteListParent;
+ THashMap<TActorId, TString> ActorNames;
+ TDispatchContext* CurrentDispatchContext;
+ TVector<ui64> TxAllocatorTabletIds;
+
+ static ui32 NextNodeId;
+ };
+
+ template <typename TEvent>
+ TEvent* FindEvent(TEventsList& events) {
+ for (auto& event : events) {
+ if (event && event->GetTypeRewrite() == TEvent::EventType) {
+ return static_cast<TEvent*>(event->GetBase());
+ }
+ }
+
+ return nullptr;
+ }
+
+ template <typename TEvent>
+ TEvent* FindEvent(TEventsList& events, const std::function<bool(const TEvent&)>& predicate) {
+ for (auto& event : events) {
+ if (event && event->GetTypeRewrite() == TEvent::EventType && predicate(*static_cast<TEvent*>(event->GetBase()))) {
+ return static_cast<TEvent*>(event->GetBase());
+ }
+ }
+
+ return nullptr;
+ }
+
+ template <typename TEvent>
+ TEvent* GrabEvent(TEventsList& events, TAutoPtr<IEventHandle>& ev) {
+ ev.Destroy();
+ for (auto& event : events) {
+ if (event && event->GetTypeRewrite() == TEvent::EventType) {
+ ev = event;
+ return static_cast<TEvent*>(ev->GetBase());
+ }
+ }
+
+ return nullptr;
+ }
+
+ template <typename TEvent>
+ TEvent* GrabEvent(TEventsList& events, TAutoPtr<IEventHandle>& ev,
+ const std::function<bool(const typename TEvent::TPtr&)>& predicate) {
+ ev.Destroy();
+ for (auto& event : events) {
+ if (event && event->GetTypeRewrite() == TEvent::EventType) {
+ if (predicate(reinterpret_cast<const typename TEvent::TPtr&>(event))) {
+ ev = event;
+ return static_cast<TEvent*>(ev->GetBase());
+ }
+ }
+ }
+
+ return nullptr;
+ }
+
+ class IStrandingDecoratorFactory {
+ public:
+ virtual ~IStrandingDecoratorFactory() {}
+ virtual IActor* Wrap(const TActorId& delegatee, bool isSync, const TVector<TActorId>& additionalActors) = 0;
+ };
+
+ struct IReplyChecker {
+ virtual ~IReplyChecker() {}
+ virtual void OnRequest(IEventHandle *request) = 0;
+ virtual bool IsWaitingForMoreResponses(IEventHandle *response) = 0;
+ };
+
+ struct TNoneReplyChecker : IReplyChecker {
+ void OnRequest(IEventHandle*) override {
+ }
+
+ bool IsWaitingForMoreResponses(IEventHandle*) override {
+ return false;
+ }
+ };
+
+ using TReplyCheckerCreator = std::function<THolder<IReplyChecker>(void)>;
+
+ inline THolder<IReplyChecker> CreateNoneReplyChecker() {
+ return MakeHolder<TNoneReplyChecker>();
+ }
+
+ TAutoPtr<IStrandingDecoratorFactory> CreateStrandingDecoratorFactory(TTestActorRuntimeBase* runtime,
+ TReplyCheckerCreator createReplyChecker = CreateNoneReplyChecker);
+ extern ui64 DefaultRandomSeed;
+}
diff --git a/library/cpp/actors/testlib/ut/ya.make b/library/cpp/actors/testlib/ut/ya.make
new file mode 100644
index 0000000000..1d4aec06ff
--- /dev/null
+++ b/library/cpp/actors/testlib/ut/ya.make
@@ -0,0 +1,20 @@
+UNITTEST_FOR(library/cpp/actors/testlib)
+
+OWNER(
+ kruall
+ g:kikimr
+)
+
+FORK_SUBTESTS()
+SIZE(SMALL)
+
+
+PEERDIR(
+ library/cpp/actors/core
+)
+
+SRCS(
+ decorator_ut.cpp
+)
+
+END()
diff --git a/library/cpp/actors/testlib/ya.make b/library/cpp/actors/testlib/ya.make
new file mode 100644
index 0000000000..1afb3f6059
--- /dev/null
+++ b/library/cpp/actors/testlib/ya.make
@@ -0,0 +1,27 @@
+LIBRARY()
+
+OWNER(
+ g:kikimr
+)
+
+SRCS(
+ test_runtime.cpp
+)
+
+PEERDIR(
+ library/cpp/actors/core
+ library/cpp/actors/interconnect/mock
+ library/cpp/actors/protos
+ library/cpp/random_provider
+ library/cpp/time_provider
+)
+
+IF (GCC)
+ CFLAGS(-fno-devirtualize-speculatively)
+ENDIF()
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+)
diff --git a/library/cpp/actors/util/affinity.cpp b/library/cpp/actors/util/affinity.cpp
new file mode 100644
index 0000000000..cc1b6e70ec
--- /dev/null
+++ b/library/cpp/actors/util/affinity.cpp
@@ -0,0 +1,93 @@
+#include "affinity.h"
+
+#ifdef _linux_
+#include <sched.h>
+#endif
+
+class TAffinity::TImpl {
+#ifdef _linux_
+ cpu_set_t Mask;
+#endif
+public:
+ TImpl() {
+#ifdef _linux_
+ int ar = sched_getaffinity(0, sizeof(cpu_set_t), &Mask);
+ Y_VERIFY_DEBUG(ar == 0);
+#endif
+ }
+
+ explicit TImpl(const ui8* cpus, ui32 size) {
+#ifdef _linux_
+ CPU_ZERO(&Mask);
+ for (ui32 i = 0; i != size; ++i) {
+ if (cpus[i]) {
+ CPU_SET(i, &Mask);
+ }
+ }
+#else
+ Y_UNUSED(cpus);
+ Y_UNUSED(size);
+#endif
+ }
+
+ void Set() const {
+#ifdef _linux_
+ int ar = sched_setaffinity(0, sizeof(cpu_set_t), &Mask);
+ Y_VERIFY_DEBUG(ar == 0);
+#endif
+ }
+
+ operator TCpuMask() const {
+ TCpuMask result;
+#ifdef _linux_
+ for (ui32 i = 0; i != CPU_SETSIZE; ++i) {
+ result.Cpus.emplace_back(CPU_ISSET(i, &Mask));
+ }
+ result.RemoveTrailingZeros();
+#endif
+ return result;
+ }
+
+};
+
+TAffinity::TAffinity() {
+}
+
+TAffinity::~TAffinity() {
+}
+
+TAffinity::TAffinity(const ui8* x, ui32 sz) {
+ if (x && sz) {
+ Impl.Reset(new TImpl(x, sz));
+ }
+}
+
+TAffinity::TAffinity(const TCpuMask& mask) {
+ if (!mask.IsEmpty()) {
+ static_assert(sizeof(ui8) == sizeof(mask.Cpus[0]));
+ const ui8* x = reinterpret_cast<const ui8*>(&mask.Cpus[0]);
+ const ui32 sz = mask.Size();
+ Impl.Reset(new TImpl(x, sz));
+ }
+}
+
+void TAffinity::Current() {
+ Impl.Reset(new TImpl());
+}
+
+void TAffinity::Set() const {
+ if (!!Impl) {
+ Impl->Set();
+ }
+}
+
+bool TAffinity::Empty() const {
+ return !Impl;
+}
+
+TAffinity::operator TCpuMask() const {
+ if (!!Impl) {
+ return *Impl;
+ }
+ return TCpuMask();
+}
diff --git a/library/cpp/actors/util/affinity.h b/library/cpp/actors/util/affinity.h
new file mode 100644
index 0000000000..ae106ed180
--- /dev/null
+++ b/library/cpp/actors/util/affinity.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#include "defs.h"
+#include "cpumask.h"
+
+// Platform-specific class to set or get thread affinity
+class TAffinity: public TThrRefBase, TNonCopyable {
+ class TImpl;
+ THolder<TImpl> Impl;
+
+public:
+ TAffinity();
+ TAffinity(const ui8* cpus, ui32 size);
+ explicit TAffinity(const TCpuMask& mask);
+ ~TAffinity();
+
+ void Current();
+ void Set() const;
+ bool Empty() const;
+
+ operator TCpuMask() const;
+};
+
+// Scoped affinity setter
+class TAffinityGuard : TNonCopyable {
+ bool Stacked;
+ TAffinity OldAffinity;
+
+public:
+ TAffinityGuard(const TAffinity* affinity) {
+ Stacked = false;
+ if (affinity && !affinity->Empty()) {
+ OldAffinity.Current();
+ affinity->Set();
+ Stacked = true;
+ }
+ }
+
+ ~TAffinityGuard() {
+ Release();
+ }
+
+ void Release() {
+ if (Stacked) {
+ OldAffinity.Set();
+ Stacked = false;
+ }
+ }
+};
diff --git a/library/cpp/actors/util/cpumask.h b/library/cpp/actors/util/cpumask.h
new file mode 100644
index 0000000000..29741aa1d6
--- /dev/null
+++ b/library/cpp/actors/util/cpumask.h
@@ -0,0 +1,133 @@
+#pragma once
+
+#include "defs.h"
+
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+
+#include <util/string/split.h>
+#include <util/generic/yexception.h>
+
+using TCpuId = ui32;
+
+// Simple data structure to operate with set of cpus
+struct TCpuMask {
+ TStackVec<bool, 1024> Cpus;
+
+ // Creates empty mask
+ TCpuMask() {}
+
+ // Creates mask with single cpu set
+ explicit TCpuMask(TCpuId cpuId) {
+ Set(cpuId);
+ }
+
+ // Initialize mask from raw boolean array
+ template <class T>
+ TCpuMask(const T* cpus, TCpuId size) {
+ Cpus.reserve(size);
+ for (TCpuId i = 0; i != size; ++i) {
+ Cpus.emplace_back(bool(cpus[i]));
+ }
+ }
+
+ // Parse a numerical list of processors. The numbers are separated by commas and may include ranges. For example: 0,5,7,9-11
+ explicit TCpuMask(const TString& cpuList) {
+ try {
+ for (TStringBuf s : StringSplitter(cpuList).Split(',')) {
+ TCpuId l, r;
+ if (s.find('-') != TString::npos) {
+ StringSplitter(s).Split('-').CollectInto(&l, &r);
+ } else {
+ l = r = FromString<TCpuId>(s);
+ }
+ if (r >= Cpus.size()) {
+ Cpus.resize(r + 1, false);
+ }
+ for (TCpuId cpu = l; cpu <= r; cpu++) {
+ Cpus[cpu] = true;
+ }
+ }
+ } catch (...) {
+ ythrow TWithBackTrace<yexception>() << "Exception occured while parsing cpu list '" << cpuList << "': " << CurrentExceptionMessage();
+ }
+ }
+
+ // Returns size of underlying vector
+ TCpuId Size() const {
+ return Cpus.size();
+ }
+
+ // Returns number of set bits in mask
+ TCpuId CpuCount() const {
+ TCpuId result = 0;
+ for (bool value : Cpus) {
+ result += value;
+ }
+ return result;
+ }
+
+ bool IsEmpty() const {
+ for (bool value : Cpus) {
+ if (value) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ bool IsSet(TCpuId cpu) const {
+ return cpu < Cpus.size() && Cpus[cpu];
+ }
+
+ void Set(TCpuId cpu) {
+ if (cpu >= Cpus.size()) {
+ Cpus.resize(cpu + 1, false);
+ }
+ Cpus[cpu] = true;
+ }
+
+ void Reset(TCpuId cpu) {
+ if (cpu < Cpus.size()) {
+ Cpus[cpu] = false;
+ }
+ }
+
+ void RemoveTrailingZeros() {
+ while (!Cpus.empty() && !Cpus.back()) {
+ Cpus.pop_back();
+ }
+ }
+
+ explicit operator bool() const {
+ return !IsEmpty();
+ }
+
+ TCpuMask operator &(const TCpuMask& rhs) const {
+ TCpuMask result;
+ TCpuId size = Max(Size(), rhs.Size());
+ result.Cpus.reserve(size);
+ for (TCpuId cpu = 0; cpu < size; cpu++) {
+ result.Cpus.emplace_back(IsSet(cpu) && rhs.IsSet(cpu));
+ }
+ return result;
+ }
+
+ TCpuMask operator |(const TCpuMask& rhs) const {
+ TCpuMask result;
+ TCpuId size = Max(Size(), rhs.Size());
+ result.Cpus.reserve(size);
+ for (TCpuId cpu = 0; cpu < size; cpu++) {
+ result.Cpus.emplace_back(IsSet(cpu) || rhs.IsSet(cpu));
+ }
+ return result;
+ }
+
+ TCpuMask operator -(const TCpuMask& rhs) const {
+ TCpuMask result;
+ result.Cpus.reserve(Size());
+ for (TCpuId cpu = 0; cpu < Size(); cpu++) {
+ result.Cpus.emplace_back(IsSet(cpu) && !rhs.IsSet(cpu));
+ }
+ return result;
+ }
+};
diff --git a/library/cpp/actors/util/datetime.h b/library/cpp/actors/util/datetime.h
new file mode 100644
index 0000000000..cbec5965d6
--- /dev/null
+++ b/library/cpp/actors/util/datetime.h
@@ -0,0 +1,82 @@
+#pragma once
+
+#include <util/system/defaults.h>
+#include <util/system/hp_timer.h>
+#include <util/system/platform.h>
+
+#if defined(_win_)
+#include <intrin.h>
+#pragma intrinsic(__rdtsc)
+#endif // _win_
+
+#if defined(_darwin_) && !defined(_x86_)
+#include <mach/mach_time.h>
+#endif
+
+// GetCycleCount() from util/system/datetime.h uses rdtscp, which is more accurate than rdtsc,
+// but rdtscp disables processor's out-of-order execution, so it can be slow
+Y_FORCE_INLINE ui64 GetCycleCountFast() {
+#if defined(_MSC_VER)
+ // Generates the rdtsc instruction, which returns the processor time stamp.
+ // The processor time stamp records the number of clock cycles since the last reset.
+ return __rdtsc();
+#elif defined(__clang__) && !defined(_arm64_)
+ return __builtin_readcyclecounter();
+#elif defined(_x86_64_)
+ unsigned hi, lo;
+ __asm__ __volatile__("rdtsc"
+ : "=a"(lo), "=d"(hi));
+ return ((unsigned long long)lo) | (((unsigned long long)hi) << 32);
+#elif defined(_i386_)
+ ui64 x;
+ __asm__ volatile("rdtsc\n\t"
+ : "=A"(x));
+ return x;
+#elif defined(_darwin_)
+ return mach_absolute_time();
+#elif defined(_arm32_)
+ return MicroSeconds();
+#elif defined(_arm64_)
+ ui64 x;
+
+ __asm__ __volatile__("isb; mrs %0, cntvct_el0"
+ : "=r"(x));
+
+ return x;
+#else
+#error "unsupported arch"
+#endif
+}
+
+// NHPTimer::GetTime fast analog
+Y_FORCE_INLINE void GetTimeFast(NHPTimer::STime* pTime) noexcept {
+ *pTime = GetCycleCountFast();
+}
+
+namespace NActors {
+ inline double Ts2Ns(ui64 ts) {
+ return NHPTimer::GetSeconds(ts) * 1e9;
+ }
+
+ inline double Ts2Us(ui64 ts) {
+ return NHPTimer::GetSeconds(ts) * 1e6;
+ }
+
+ inline double Ts2Ms(ui64 ts) {
+ return NHPTimer::GetSeconds(ts) * 1e3;
+ }
+
+ inline ui64 Us2Ts(double us) {
+ return ui64(NHPTimer::GetClockRate() * us / 1e6);
+ }
+
+ struct TTimeTracker {
+ ui64 Ts;
+ TTimeTracker(): Ts(GetCycleCountFast()) {}
+ ui64 Elapsed() {
+ ui64 ts = GetCycleCountFast();
+ std::swap(Ts, ts);
+ return Ts - ts;
+ }
+ };
+}
diff --git a/library/cpp/actors/util/defs.h b/library/cpp/actors/util/defs.h
new file mode 100644
index 0000000000..5c3b57665b
--- /dev/null
+++ b/library/cpp/actors/util/defs.h
@@ -0,0 +1,16 @@
+#pragma once
+
+// unique tag to fix pragma once gcc glueing: ./library/actors/util/defs.h
+
+#include <util/system/defaults.h>
+#include <util/generic/bt_exception.h>
+#include <util/generic/noncopyable.h>
+#include <util/generic/ptr.h>
+#include <util/generic/string.h>
+#include <util/generic/yexception.h>
+#include <util/system/atomic.h>
+#include <util/system/align.h>
+#include <util/generic/vector.h>
+#include <util/datetime/base.h>
+#include <util/generic/ylimits.h>
+#include "intrinsics.h"
diff --git a/library/cpp/actors/util/funnel_queue.h b/library/cpp/actors/util/funnel_queue.h
new file mode 100644
index 0000000000..0e21e2617c
--- /dev/null
+++ b/library/cpp/actors/util/funnel_queue.h
@@ -0,0 +1,240 @@
+#pragma once
+
+#include <util/system/atomic.h>
+#include <util/generic/noncopyable.h>
+
+template <typename ElementType>
+class TFunnelQueue: private TNonCopyable {
+public:
+ TFunnelQueue() noexcept
+ : Front(nullptr)
+ , Back(nullptr)
+ {
+ }
+
+ virtual ~TFunnelQueue() noexcept {
+ for (auto entry = Front; entry; entry = DeleteEntry(entry))
+ continue;
+ }
+
+ /// Push element. Can be used from many threads. Return true if is first element.
+ bool
+ Push(ElementType&& element) noexcept {
+ TEntry* const next = NewEntry(static_cast<ElementType&&>(element));
+ TEntry* const prev = AtomicSwap(&Back, next);
+ AtomicSet(prev ? prev->Next : Front, next);
+ return !prev;
+ }
+
+ /// Extract top element. Must be used only from one thread. Return true if have more.
+ bool
+ Pop() noexcept {
+ if (TEntry* const top = AtomicGet(Front)) {
+ const auto last = AtomicCas(&Back, nullptr, top);
+ if (last) // This is last element in queue. Queue is empty now.
+ AtomicCas(&Front, nullptr, top);
+ else // This element is not last.
+ for (;;) {
+ if (const auto next = AtomicGet(top->Next)) {
+ AtomicSet(Front, next);
+ break;
+ }
+ // But Next is null. Wait next assignment in spin lock.
+ }
+
+ DeleteEntry(top);
+ return !last;
+ }
+
+ return false;
+ }
+
+ /// Peek top element. Must be used only from one thread.
+ ElementType&
+ Top() const noexcept {
+ return AtomicGet(Front)->Data;
+ }
+
+ bool
+ IsEmpty() const noexcept {
+ return !AtomicGet(Front);
+ }
+
+protected:
+ class TEntry: private TNonCopyable {
+ friend class TFunnelQueue;
+
+ private:
+ explicit TEntry(ElementType&& element) noexcept
+ : Data(static_cast<ElementType&&>(element))
+ , Next(nullptr)
+ {
+ }
+
+ ~TEntry() noexcept {
+ }
+
+ public:
+ ElementType Data;
+ TEntry* volatile Next;
+ };
+
+ TEntry* volatile Front;
+ TEntry* volatile Back;
+
+ virtual TEntry* NewEntry(ElementType&& element) noexcept {
+ return new TEntry(static_cast<ElementType&&>(element));
+ }
+
+ virtual TEntry* DeleteEntry(TEntry* entry) noexcept {
+ const auto next = entry->Next;
+ delete entry;
+ return next;
+ }
+
+protected:
+ struct TEntryIter {
+ TEntry* ptr;
+
+ ElementType& operator*() {
+ return ptr->Data;
+ }
+
+ ElementType* operator->() {
+ return &ptr->Data;
+ }
+
+ TEntryIter& operator++() {
+ ptr = AtomicGet(ptr->Next);
+ return *this;
+ }
+
+ bool operator!=(const TEntryIter& other) const {
+ return ptr != other.ptr;
+ }
+
+ bool operator==(const TEntryIter& other) const {
+ return ptr == other.ptr;
+ }
+ };
+
+ struct TConstEntryIter {
+ const TEntry* ptr;
+
+ const ElementType& operator*() {
+ return ptr->Data;
+ }
+
+ const ElementType* operator->() {
+ return &ptr->Data;
+ }
+
+ TEntryIter& operator++() {
+ ptr = AtomicGet(ptr->Next);
+ return *this;
+ }
+
+ bool operator!=(const TConstEntryIter& other) const {
+ return ptr != other.ptr;
+ }
+
+ bool operator==(const TConstEntryIter& other) const {
+ return ptr == other.ptr;
+ }
+ };
+
+public:
+ using const_iterator = TConstEntryIter;
+ using iterator = TEntryIter;
+
+ iterator begin() {
+ return {AtomicGet(Front)};
+ }
+ const_iterator cbegin() {
+ return {AtomicGet(Front)};
+ }
+ const_iterator begin() const {
+ return {AtomicGet(Front)};
+ }
+
+ iterator end() {
+ return {nullptr};
+ }
+ const_iterator cend() {
+ return {nullptr};
+ }
+ const_iterator end() const {
+ return {nullptr};
+ }
+};
+
+template <typename ElementType>
+class TPooledFunnelQueue: public TFunnelQueue<ElementType> {
+public:
+ TPooledFunnelQueue() noexcept
+ : Stack(nullptr)
+ {
+ }
+
+ virtual ~TPooledFunnelQueue() noexcept override {
+ for (auto entry = TBase::Front; entry; entry = TBase::DeleteEntry(entry))
+ continue;
+ for (auto entry = Stack; entry; entry = TBase::DeleteEntry(entry))
+ continue;
+ TBase::Back = TBase::Front = Stack = nullptr;
+ }
+
+private:
+ typedef TFunnelQueue<ElementType> TBase;
+
+ typename TBase::TEntry* volatile Stack;
+
+protected:
+ virtual typename TBase::TEntry* NewEntry(ElementType&& element) noexcept override {
+ while (const auto top = AtomicGet(Stack))
+ if (AtomicCas(&Stack, top->Next, top)) {
+ top->Data = static_cast<ElementType&&>(element);
+ AtomicSet(top->Next, nullptr);
+ return top;
+ }
+
+ return TBase::NewEntry(static_cast<ElementType&&>(element));
+ }
+
+ virtual typename TBase::TEntry* DeleteEntry(typename TBase::TEntry* entry) noexcept override {
+ entry->Data = ElementType();
+ const auto next = entry->Next;
+ do
+ AtomicSet(entry->Next, AtomicGet(Stack));
+ while (!AtomicCas(&Stack, entry, entry->Next));
+ return next;
+ }
+};
+
+template <typename ElementType, template <typename T> class TQueueType = TFunnelQueue>
+class TCountedFunnelQueue: public TQueueType<ElementType> {
+public:
+ TCountedFunnelQueue() noexcept
+ : Count(0)
+ {
+ }
+
+ TAtomicBase GetSize() const noexcept {
+ return AtomicGet(Count);
+ }
+
+private:
+ typedef TQueueType<ElementType> TBase;
+
+ virtual typename TBase::TEntry* NewEntry(ElementType&& element) noexcept override {
+ AtomicAdd(Count, 1);
+ return TBase::NewEntry(static_cast<ElementType&&>(element));
+ }
+
+ virtual typename TBase::TEntry* DeleteEntry(typename TBase::TEntry* entry) noexcept override {
+ AtomicSub(Count, 1);
+ return TBase::DeleteEntry(entry);
+ }
+
+ TAtomic Count;
+};
diff --git a/library/cpp/actors/util/futex.h b/library/cpp/actors/util/futex.h
new file mode 100644
index 0000000000..c193f8d128
--- /dev/null
+++ b/library/cpp/actors/util/futex.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#ifdef _linux_
+
+#include <linux/futex.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static long SysFutex(void* addr1, int op, int val1, struct timespec* timeout, void* addr2, int val3) {
+ return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3);
+}
+
+#endif
diff --git a/library/cpp/actors/util/intrinsics.h b/library/cpp/actors/util/intrinsics.h
new file mode 100644
index 0000000000..df07e36896
--- /dev/null
+++ b/library/cpp/actors/util/intrinsics.h
@@ -0,0 +1,97 @@
+#pragma once
+
+#include <util/system/defaults.h>
+#include <util/system/atomic.h>
+#include <util/system/spinlock.h>
+
+#include <library/cpp/sse/sse.h> // The header chooses appropriate SSE support
+
+static_assert(sizeof(TAtomic) == 8, "expect sizeof(TAtomic) == 8");
+
+// we need explicit 32 bit operations to keep cache-line friendly packs
+// so have to define some atomics additionaly to arcadia one
+#ifdef _win_
+#pragma intrinsic(_InterlockedCompareExchange)
+#pragma intrinsic(_InterlockedExchangeAdd)
+#pragma intrinsic(_InterlockedIncrement)
+#pragma intrinsic(_InterlockedDecrement)
+#endif
+
+inline bool AtomicUi32Cas(volatile ui32* a, ui32 exchange, ui32 compare) {
+#ifdef _win_
+ return _InterlockedCompareExchange((volatile long*)a, exchange, compare) == (long)compare;
+#else
+ ui32 expected = compare;
+ return __atomic_compare_exchange_n(a, &expected, exchange, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+#endif
+}
+
+inline ui32 AtomicUi32Add(volatile ui32* a, ui32 add) {
+#ifdef _win_
+ return _InterlockedExchangeAdd((volatile long*)a, add) + add;
+#else
+ return __atomic_add_fetch(a, add, __ATOMIC_SEQ_CST);
+#endif
+}
+
+inline ui32 AtomicUi32Sub(volatile ui32* a, ui32 sub) {
+#ifdef _win_
+ return _InterlockedExchangeAdd((volatile long*)a, -(long)sub) - sub;
+#else
+ return __atomic_sub_fetch(a, sub, __ATOMIC_SEQ_CST);
+#endif
+}
+
+inline ui32 AtomicUi32Increment(volatile ui32* a) {
+#ifdef _win_
+ return _InterlockedIncrement((volatile long*)a);
+#else
+ return __atomic_add_fetch(a, 1, __ATOMIC_SEQ_CST);
+#endif
+}
+
+inline ui32 AtomicUi32Decrement(volatile ui32* a) {
+#ifdef _win_
+ return _InterlockedDecrement((volatile long*)a);
+#else
+ return __atomic_sub_fetch(a, 1, __ATOMIC_SEQ_CST);
+#endif
+}
+
+template <typename T>
+inline void AtomicStore(volatile T* a, T x) {
+ static_assert(std::is_integral<T>::value || std::is_pointer<T>::value, "expect std::is_integral<T>::value || std::is_pointer<T>::value");
+#ifdef _win_
+ *a = x;
+#else
+ __atomic_store_n(a, x, __ATOMIC_RELEASE);
+#endif
+}
+
+template <typename T>
+inline void RelaxedStore(volatile T* a, T x) {
+ static_assert(std::is_integral<T>::value || std::is_pointer<T>::value, "expect std::is_integral<T>::value || std::is_pointer<T>::value");
+#ifdef _win_
+ *a = x;
+#else
+ __atomic_store_n(a, x, __ATOMIC_RELAXED);
+#endif
+}
+
+template <typename T>
+inline T AtomicLoad(volatile T* a) {
+#ifdef _win_
+ return *a;
+#else
+ return __atomic_load_n(a, __ATOMIC_ACQUIRE);
+#endif
+}
+
+template <typename T>
+inline T RelaxedLoad(volatile T* a) {
+#ifdef _win_
+ return *a;
+#else
+ return __atomic_load_n(a, __ATOMIC_RELAXED);
+#endif
+}
diff --git a/library/cpp/actors/util/local_process_key.h b/library/cpp/actors/util/local_process_key.h
new file mode 100644
index 0000000000..172f08fc73
--- /dev/null
+++ b/library/cpp/actors/util/local_process_key.h
@@ -0,0 +1,132 @@
+#pragma once
+
+#include <util/string/builder.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/vector.h>
+#include <util/generic/hash.h>
+#include <util/generic/singleton.h>
+#include <util/generic/serialized_enum.h>
+
+template <typename T>
+class TLocalProcessKeyState {
+
+template <typename U, const char* Name>
+friend class TLocalProcessKey;
+template <typename U, typename EnumT>
+friend class TEnumProcessKey;
+
+public:
+ static TLocalProcessKeyState& GetInstance() {
+ return *Singleton<TLocalProcessKeyState<T>>();
+ }
+
+ size_t GetCount() const {
+ return StartIndex + Names.size();
+ }
+
+ TStringBuf GetNameByIndex(size_t index) const {
+ if (index < StartIndex) {
+ return StaticNames[index];
+ } else {
+ index -= StartIndex;
+ Y_ENSURE(index < Names.size());
+ return Names[index];
+ }
+ }
+
+ size_t GetIndexByName(TStringBuf name) const {
+ auto it = Map.find(name);
+ Y_ENSURE(it != Map.end());
+ return it->second;
+ }
+
+private:
+ size_t Register(TStringBuf name) {
+ auto x = Map.emplace(name, Names.size()+StartIndex);
+ if (x.second) {
+ Names.emplace_back(name);
+ }
+
+ return x.first->second;
+ }
+
+ size_t Register(TStringBuf name, ui32 index) {
+ Y_VERIFY(index < StartIndex);
+ auto x = Map.emplace(name, index);
+ Y_VERIFY(x.second || x.first->second == index);
+ StaticNames[index] = name;
+ return x.first->second;
+ }
+
+private:
+ static constexpr ui32 StartIndex = 2000;
+
+ TVector<TString> FillStaticNames() {
+ TVector<TString> staticNames;
+ staticNames.reserve(StartIndex);
+ for (ui32 i = 0; i < StartIndex; i++) {
+ staticNames.push_back(TStringBuilder() << "Activity_" << i);
+ }
+ return staticNames;
+ }
+
+ TVector<TString> StaticNames = FillStaticNames();
+ TVector<TString> Names;
+ THashMap<TString, size_t> Map;
+};
+
+template <typename T, const char* Name>
+class TLocalProcessKey {
+public:
+ static TStringBuf GetName() {
+ return Name;
+ }
+
+ static size_t GetIndex() {
+ return Index;
+ }
+
+private:
+ inline static size_t Index = TLocalProcessKeyState<T>::GetInstance().Register(Name);
+};
+
+template <typename T, typename EnumT>
+class TEnumProcessKey {
+public:
+ static TStringBuf GetName(EnumT key) {
+ return TLocalProcessKeyState<T>::GetInstance().GetNameByIndex(GetIndex(key));
+ }
+
+ static size_t GetIndex(EnumT key) {
+ ui32 index = static_cast<ui32>(key);
+ if (index < TLocalProcessKeyState<T>::StartIndex) {
+ return index;
+ }
+ Y_VERIFY(index < Enum2Index.size());
+ return Enum2Index[index];
+ }
+
+private:
+ inline static TVector<size_t> RegisterAll() {
+ static_assert(std::is_enum<EnumT>::value, "Enum is required");
+
+ TVector<size_t> enum2Index;
+ auto names = GetEnumNames<EnumT>();
+ ui32 maxId = 0;
+ for (const auto& [k, v] : names) {
+ maxId = Max(maxId, static_cast<ui32>(k));
+ }
+ enum2Index.resize(maxId+1);
+ for (ui32 i = 0; i <= maxId && i < TLocalProcessKeyState<T>::StartIndex; i++) {
+ enum2Index[i] = i;
+ }
+
+ for (const auto& [k, v] : names) {
+ ui32 enumId = static_cast<ui32>(k);
+ enum2Index[enumId] = TLocalProcessKeyState<T>::GetInstance().Register(v, enumId);
+ }
+ return enum2Index;
+ }
+
+ inline static TVector<size_t> Enum2Index = RegisterAll();
+};
diff --git a/library/cpp/actors/util/named_tuple.h b/library/cpp/actors/util/named_tuple.h
new file mode 100644
index 0000000000..67f185bba8
--- /dev/null
+++ b/library/cpp/actors/util/named_tuple.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include "defs.h"
+
+template <typename TDerived>
+struct TNamedTupleBase {
+ friend bool operator==(const TDerived& x, const TDerived& y) {
+ return x.ConvertToTuple() == y.ConvertToTuple();
+ }
+
+ friend bool operator!=(const TDerived& x, const TDerived& y) {
+ return x.ConvertToTuple() != y.ConvertToTuple();
+ }
+
+ friend bool operator<(const TDerived& x, const TDerived& y) {
+ return x.ConvertToTuple() < y.ConvertToTuple();
+ }
+
+ friend bool operator<=(const TDerived& x, const TDerived& y) {
+ return x.ConvertToTuple() <= y.ConvertToTuple();
+ }
+
+ friend bool operator>(const TDerived& x, const TDerived& y) {
+ return x.ConvertToTuple() > y.ConvertToTuple();
+ }
+
+ friend bool operator>=(const TDerived& x, const TDerived& y) {
+ return x.ConvertToTuple() >= y.ConvertToTuple();
+ }
+};
diff --git a/library/cpp/actors/util/queue_chunk.h b/library/cpp/actors/util/queue_chunk.h
new file mode 100644
index 0000000000..8a4e02d8cb
--- /dev/null
+++ b/library/cpp/actors/util/queue_chunk.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include "defs.h"
+
+template <typename T, ui32 TSize, typename TDerived>
+struct TQueueChunkDerived {
+ static const ui32 EntriesCount = (TSize - sizeof(TQueueChunkDerived*)) / sizeof(T);
+ static_assert(EntriesCount > 0, "expect EntriesCount > 0");
+
+ volatile T Entries[EntriesCount];
+ TDerived* volatile Next;
+
+ TQueueChunkDerived() {
+ memset(this, 0, sizeof(TQueueChunkDerived));
+ }
+};
+
+template <typename T, ui32 TSize>
+struct TQueueChunk {
+ static const ui32 EntriesCount = (TSize - sizeof(TQueueChunk*)) / sizeof(T);
+ static_assert(EntriesCount > 0, "expect EntriesCount > 0");
+
+ volatile T Entries[EntriesCount];
+ TQueueChunk* volatile Next;
+
+ TQueueChunk() {
+ memset(this, 0, sizeof(TQueueChunk));
+ }
+};
diff --git a/library/cpp/actors/util/queue_oneone_inplace.h b/library/cpp/actors/util/queue_oneone_inplace.h
new file mode 100644
index 0000000000..d7ec8bb21c
--- /dev/null
+++ b/library/cpp/actors/util/queue_oneone_inplace.h
@@ -0,0 +1,118 @@
+#pragma once
+
+#include "defs.h"
+#include "queue_chunk.h"
+
+template <typename T, ui32 TSize, typename TChunk = TQueueChunk<T, TSize>>
+class TOneOneQueueInplace : TNonCopyable {
+ static_assert(std::is_integral<T>::value || std::is_pointer<T>::value, "expect std::is_integral<T>::value || std::is_pointer<T>::valuer");
+
+ TChunk* ReadFrom;
+ ui32 ReadPosition;
+ ui32 WritePosition;
+ TChunk* WriteTo;
+
+ friend class TReadIterator;
+
+public:
+ class TReadIterator {
+ TChunk* ReadFrom;
+ ui32 ReadPosition;
+
+ public:
+ TReadIterator(TChunk* readFrom, ui32 readPosition)
+ : ReadFrom(readFrom)
+ , ReadPosition(readPosition)
+ {
+ }
+
+ inline T Next() {
+ TChunk* head = ReadFrom;
+ if (ReadPosition != TChunk::EntriesCount) {
+ return AtomicLoad(&head->Entries[ReadPosition++]);
+ } else if (TChunk* next = AtomicLoad(&head->Next)) {
+ ReadFrom = next;
+ ReadPosition = 0;
+ return Next();
+ }
+ return T{};
+ }
+ };
+
+ TOneOneQueueInplace()
+ : ReadFrom(new TChunk())
+ , ReadPosition(0)
+ , WritePosition(0)
+ , WriteTo(ReadFrom)
+ {
+ }
+
+ ~TOneOneQueueInplace() {
+ Y_VERIFY_DEBUG(Head() == 0);
+ delete ReadFrom;
+ }
+
+ struct TPtrCleanDestructor {
+ static inline void Destroy(TOneOneQueueInplace<T, TSize>* x) noexcept {
+ while (T head = x->Pop())
+ delete head;
+ delete x;
+ }
+ };
+
+ struct TCleanDestructor {
+ static inline void Destroy(TOneOneQueueInplace<T, TSize>* x) noexcept {
+ while (x->Pop() != nullptr)
+ continue;
+ delete x;
+ }
+ };
+
+ struct TPtrCleanInplaceMallocDestructor {
+ template <typename TPtrVal>
+ static inline void Destroy(TOneOneQueueInplace<TPtrVal*, TSize>* x) noexcept {
+ while (TPtrVal* head = x->Pop()) {
+ head->~TPtrVal();
+ free(head);
+ }
+ delete x;
+ }
+ };
+
+ void Push(T x) noexcept {
+ if (WritePosition != TChunk::EntriesCount) {
+ AtomicStore(&WriteTo->Entries[WritePosition], x);
+ ++WritePosition;
+ } else {
+ TChunk* next = new TChunk();
+ next->Entries[0] = x;
+ AtomicStore(&WriteTo->Next, next);
+ WriteTo = next;
+ WritePosition = 1;
+ }
+ }
+
+ T Head() {
+ TChunk* head = ReadFrom;
+ if (ReadPosition != TChunk::EntriesCount) {
+ return AtomicLoad(&head->Entries[ReadPosition]);
+ } else if (TChunk* next = AtomicLoad(&head->Next)) {
+ ReadFrom = next;
+ delete head;
+ ReadPosition = 0;
+ return Head();
+ }
+ return T{};
+ }
+
+ T Pop() {
+ T ret = Head();
+ if (ret)
+ ++ReadPosition;
+ return ret;
+ }
+
+ TReadIterator Iterator() {
+ return TReadIterator(ReadFrom, ReadPosition);
+ }
+};
diff --git a/library/cpp/actors/util/recentwnd.h b/library/cpp/actors/util/recentwnd.h
new file mode 100644
index 0000000000..ba1ede6f29
--- /dev/null
+++ b/library/cpp/actors/util/recentwnd.h
@@ -0,0 +1,67 @@
+#pragma once
+
+#include <util/generic/deque.h>
+
+template <typename TElem,
+ template <typename, typename...> class TContainer = TDeque>
+class TRecentWnd {
+public:
+ TRecentWnd(ui32 wndSize)
+ : MaxWndSize_(wndSize)
+ {
+ }
+
+ void Push(const TElem& elem) {
+ if (Window_.size() == MaxWndSize_)
+ Window_.erase(Window_.begin());
+ Window_.emplace_back(elem);
+ }
+
+ void Push(TElem&& elem) {
+ if (Window_.size() == MaxWndSize_)
+ Window_.erase(Window_.begin());
+ Window_.emplace_back(std::move(elem));
+ }
+
+ TElem& Last() {
+ return Window_.back();
+ }
+ const TElem& Last() const {
+ return Window_.back();
+ }
+ bool Full() const {
+ return Window_.size() == MaxWndSize_;
+ }
+ ui64 Size() const {
+ return Window_.size();
+ }
+
+ using const_iterator = typename TContainer<TElem>::const_iterator;
+
+ const_iterator begin() {
+ return Window_.begin();
+ }
+ const_iterator end() {
+ return Window_.end();
+ }
+
+ void Reset(ui32 wndSize = 0) {
+ Window_.clear();
+ if (wndSize != 0) {
+ MaxWndSize_ = wndSize;
+ }
+ }
+
+ void ResetWnd(ui32 wndSize) {
+ Y_VERIFY(wndSize != 0);
+ MaxWndSize_ = wndSize;
+ if (Window_.size() > MaxWndSize_) {
+ Window_.erase(Window_.begin(),
+ Window_.begin() + Window_.size() - MaxWndSize_);
+ }
+ }
+
+private:
+ TContainer<TElem> Window_;
+ ui32 MaxWndSize_;
+};
diff --git a/library/cpp/actors/util/rope.h b/library/cpp/actors/util/rope.h
new file mode 100644
index 0000000000..f5595efbaa
--- /dev/null
+++ b/library/cpp/actors/util/rope.h
@@ -0,0 +1,1161 @@
+#pragma once
+
+#include <util/generic/ptr.h>
+#include <util/generic/string.h>
+#include <util/generic/hash_set.h>
+#include <util/stream/str.h>
+#include <util/system/sanitizers.h>
+#include <util/system/valgrind.h>
+
+// exactly one of them must be included
+#include "rope_cont_list.h"
+//#include "rope_cont_deque.h"
+
+struct IRopeChunkBackend : TThrRefBase {
+ using TData = std::tuple<const char*, size_t>;
+ virtual ~IRopeChunkBackend() = default;
+ virtual TData GetData() const = 0;
+ virtual size_t GetCapacity() const = 0;
+ using TPtr = TIntrusivePtr<IRopeChunkBackend>;
+};
+
+class TRopeAlignedBuffer : public IRopeChunkBackend {
+ static constexpr size_t Alignment = 16;
+ static constexpr size_t MallocAlignment = sizeof(size_t);
+
+ ui32 Size;
+ const ui32 Capacity;
+ const ui32 Offset;
+ alignas(Alignment) char Data[];
+
+ TRopeAlignedBuffer(size_t size)
+ : Size(size)
+ , Capacity(size)
+ , Offset((Alignment - reinterpret_cast<uintptr_t>(Data)) & (Alignment - 1))
+ {
+ Y_VERIFY(Offset <= Alignment - MallocAlignment);
+ }
+
+public:
+ static TIntrusivePtr<TRopeAlignedBuffer> Allocate(size_t size) {
+ return new(malloc(sizeof(TRopeAlignedBuffer) + size + Alignment - MallocAlignment)) TRopeAlignedBuffer(size);
+ }
+
+ void *operator new(size_t) {
+ Y_FAIL();
+ }
+
+ void *operator new(size_t, void *ptr) {
+ return ptr;
+ }
+
+ void operator delete(void *ptr) {
+ free(ptr);
+ }
+
+ void operator delete(void* p, void* ptr) {
+ Y_UNUSED(p);
+ Y_UNUSED(ptr);
+ }
+
+ TData GetData() const override {
+ return {Data + Offset, Size};
+ }
+
+ size_t GetCapacity() const override {
+ return Capacity;
+ }
+
+ char *GetBuffer() {
+ return Data + Offset;
+ }
+
+ void AdjustSize(size_t size) {
+ Y_VERIFY(size <= Capacity);
+ Size = size;
+ }
+};
+
+namespace NRopeDetails {
+
+ template<bool IsConst, typename TRope, typename TList>
+ struct TIteratorTraits;
+
+ template<typename TRope, typename TList>
+ struct TIteratorTraits<true, TRope, TList> {
+ using TRopePtr = const TRope*;
+ using TListIterator = typename TList::const_iterator;
+ };
+
+ template<typename TRope, typename TList>
+ struct TIteratorTraits<false, TRope, TList> {
+ using TRopePtr = TRope*;
+ using TListIterator = typename TList::iterator;
+ };
+
+} // NRopeDetails
+
+class TRopeArena;
+
+template<typename T>
+struct always_false : std::false_type {};
+
+class TRope {
+ friend class TRopeArena;
+
+ struct TChunk
+ {
+ class TBackend {
+ enum class EType : uintptr_t {
+ STRING,
+ ROPE_CHUNK_BACKEND,
+ };
+
+ uintptr_t Owner = 0; // lower bits contain type of the owner
+
+ public:
+ TBackend() = delete;
+
+ TBackend(const TBackend& other)
+ : Owner(Clone(other.Owner))
+ {}
+
+ TBackend(TBackend&& other)
+ : Owner(std::exchange(other.Owner, 0))
+ {}
+
+ TBackend(TString s)
+ : Owner(Construct<TString>(EType::STRING, std::move(s)))
+ {}
+
+ TBackend(IRopeChunkBackend::TPtr backend)
+ : Owner(Construct<IRopeChunkBackend::TPtr>(EType::ROPE_CHUNK_BACKEND, std::move(backend)))
+ {}
+
+ ~TBackend() {
+ if (Owner) {
+ Destroy(Owner);
+ }
+ }
+
+ TBackend& operator =(const TBackend& other) {
+ if (Owner) {
+ Destroy(Owner);
+ }
+ Owner = Clone(other.Owner);
+ return *this;
+ }
+
+ TBackend& operator =(TBackend&& other) {
+ if (Owner) {
+ Destroy(Owner);
+ }
+ Owner = std::exchange(other.Owner, 0);
+ return *this;
+ }
+
+ bool operator ==(const TBackend& other) const {
+ return Owner == other.Owner;
+ }
+
+ const void *UniqueId() const {
+ return reinterpret_cast<const void*>(Owner);
+ }
+
+ const IRopeChunkBackend::TData GetData() const {
+ return Visit(Owner, [](EType, auto& value) -> IRopeChunkBackend::TData {
+ using T = std::decay_t<decltype(value)>;
+ if constexpr (std::is_same_v<T, TString>) {
+ return {value.data(), value.size()};
+ } else if constexpr (std::is_same_v<T, IRopeChunkBackend::TPtr>) {
+ return value->GetData();
+ } else {
+ return {};
+ }
+ });
+ }
+
+ size_t GetCapacity() const {
+ return Visit(Owner, [](EType, auto& value) {
+ using T = std::decay_t<decltype(value)>;
+ if constexpr (std::is_same_v<T, TString>) {
+ return value.capacity();
+ } else if constexpr (std::is_same_v<T, IRopeChunkBackend::TPtr>) {
+ return value->GetCapacity();
+ } else {
+ Y_FAIL();
+ }
+ });
+ }
+
+ private:
+ static constexpr uintptr_t TypeMask = (1 << 3) - 1;
+ static constexpr uintptr_t ValueMask = ~TypeMask;
+
+ template<typename T>
+ struct TObjectHolder {
+ struct TWrappedObject : TThrRefBase {
+ T Value;
+ TWrappedObject(T&& value)
+ : Value(std::move(value))
+ {}
+ };
+ TIntrusivePtr<TWrappedObject> Object;
+
+ TObjectHolder(T&& object)
+ : Object(MakeIntrusive<TWrappedObject>(std::move(object)))
+ {}
+ };
+
+ template<typename TObject>
+ static uintptr_t Construct(EType type, TObject object) {
+ if constexpr (sizeof(TObject) <= sizeof(uintptr_t)) {
+ uintptr_t res = 0;
+ new(&res) TObject(std::move(object));
+ Y_VERIFY_DEBUG((res & ValueMask) == res);
+ return res | static_cast<uintptr_t>(type);
+ } else {
+ return Construct<TObjectHolder<TObject>>(type, TObjectHolder<TObject>(std::move(object)));
+ }
+ }
+
+ template<typename TCallback>
+ static std::invoke_result_t<TCallback, EType, TString&> VisitRaw(uintptr_t value, TCallback&& callback) {
+ Y_VERIFY_DEBUG(value);
+ const EType type = static_cast<EType>(value & TypeMask);
+ value &= ValueMask;
+ auto caller = [&](auto& value) { return std::invoke(std::forward<TCallback>(callback), type, value); };
+ auto wrapper = [&](auto& value) {
+ using T = std::decay_t<decltype(value)>;
+ if constexpr (sizeof(T) <= sizeof(uintptr_t)) {
+ return caller(value);
+ } else {
+ return caller(reinterpret_cast<TObjectHolder<T>&>(value));
+ }
+ };
+ switch (type) {
+ case EType::STRING: return wrapper(reinterpret_cast<TString&>(value));
+ case EType::ROPE_CHUNK_BACKEND: return wrapper(reinterpret_cast<IRopeChunkBackend::TPtr&>(value));
+ }
+ Y_FAIL("Unexpected type# %" PRIu64, static_cast<ui64>(type));
+ }
+
+ template<typename TCallback>
+ static std::invoke_result_t<TCallback, EType, TString&> Visit(uintptr_t value, TCallback&& callback) {
+ return VisitRaw(value, [&](EType type, auto& value) {
+ return std::invoke(std::forward<TCallback>(callback), type, Unwrap(value));
+ });
+ }
+
+ template<typename T> static T& Unwrap(T& object) { return object; }
+ template<typename T> static T& Unwrap(TObjectHolder<T>& holder) { return holder.Object->Value; }
+
+ static uintptr_t Clone(uintptr_t value) {
+ return VisitRaw(value, [](EType type, auto& value) { return Construct(type, value); });
+ }
+
+ static void Destroy(uintptr_t value) {
+ VisitRaw(value, [](EType, auto& value) { CallDtor(value); });
+ }
+
+ template<typename T>
+ static void CallDtor(T& value) {
+ value.~T();
+ }
+ };
+
+ TBackend Backend; // who actually holds the data
+ const char *Begin; // data start
+ const char *End; // data end
+
+ static constexpr struct TSlice {} Slice{};
+
+ template<typename T>
+ TChunk(T&& backend, const IRopeChunkBackend::TData& data)
+ : Backend(std::move(backend))
+ , Begin(std::get<0>(data))
+ , End(Begin + std::get<1>(data))
+ {
+ Y_VERIFY_DEBUG(Begin != End);
+ }
+
+ TChunk(TString s)
+ : Backend(std::move(s))
+ {
+ size_t size;
+ std::tie(Begin, size) = Backend.GetData();
+ End = Begin + size;
+ }
+
+ TChunk(IRopeChunkBackend::TPtr backend)
+ : TChunk(backend, backend->GetData())
+ {}
+
+ TChunk(TSlice, const char *data, size_t size, const TChunk& from)
+ : TChunk(from.Backend, {data, size})
+ {}
+
+ TChunk(TSlice, const char *begin, const char *end, const TChunk& from)
+ : TChunk(Slice, begin, end - begin, from)
+ {}
+
+ explicit TChunk(const TChunk& other)
+ : Backend(other.Backend)
+ , Begin(other.Begin)
+ , End(other.End)
+ {}
+
+ TChunk(TChunk&& other)
+ : Backend(std::move(other.Backend))
+ , Begin(other.Begin)
+ , End(other.End)
+ {}
+
+ TChunk& operator =(const TChunk&) = default;
+ TChunk& operator =(TChunk&&) = default;
+
+ size_t GetSize() const {
+ return End - Begin;
+ }
+
+ static void Clear(TChunk& chunk) {
+ chunk.Begin = nullptr;
+ }
+
+ static bool IsInUse(const TChunk& chunk) {
+ return chunk.Begin != nullptr;
+ }
+
+ size_t GetCapacity() const {
+ return Backend.GetCapacity();
+ }
+ };
+
+ using TChunkList = NRopeDetails::TChunkList<TChunk>;
+
+private:
+ // we use list here to store chain items as we have to keep valid iterators when erase/insert operations are invoked;
+ // iterator uses underlying container's iterator, so we have to use container that keeps valid iterators on delete,
+ // thus, the list
+ TChunkList Chain;
+ size_t Size = 0;
+
+private:
+ template<bool IsConst>
+ class TIteratorImpl {
+ using TTraits = NRopeDetails::TIteratorTraits<IsConst, TRope, TChunkList>;
+
+ typename TTraits::TRopePtr Rope;
+ typename TTraits::TListIterator Iter;
+ const char *Ptr; // ptr is always nullptr when iterator is positioned at the rope end
+
+#ifndef NDEBUG
+ ui32 ValidityToken;
+#endif
+
+ private:
+ TIteratorImpl(typename TTraits::TRopePtr rope, typename TTraits::TListIterator iter, const char *ptr = nullptr)
+ : Rope(rope)
+ , Iter(iter)
+ , Ptr(ptr)
+#ifndef NDEBUG
+ , ValidityToken(Rope->GetValidityToken())
+#endif
+ {}
+
+ public:
+ TIteratorImpl()
+ : Rope(nullptr)
+ , Ptr(nullptr)
+ {}
+
+ template<bool IsOtherConst>
+ TIteratorImpl(const TIteratorImpl<IsOtherConst>& other)
+ : Rope(other.Rope)
+ , Iter(other.Iter)
+ , Ptr(other.Ptr)
+#ifndef NDEBUG
+ , ValidityToken(other.ValidityToken)
+#endif
+ {}
+
+ void CheckValid() const {
+#ifndef NDEBUG
+ Y_VERIFY(ValidityToken == Rope->GetValidityToken());
+#endif
+ }
+
+ TIteratorImpl& operator +=(size_t amount) {
+ CheckValid();
+
+ while (amount) {
+ Y_VERIFY_DEBUG(Valid());
+ const size_t max = ContiguousSize();
+ const size_t num = std::min(amount, max);
+ amount -= num;
+ Ptr += num;
+ if (Ptr == Iter->End) {
+ AdvanceToNextContiguousBlock();
+ }
+ }
+
+ return *this;
+ }
+
+ TIteratorImpl operator +(size_t amount) const {
+ CheckValid();
+
+ return TIteratorImpl(*this) += amount;
+ }
+
+ TIteratorImpl& operator -=(size_t amount) {
+ CheckValid();
+
+ while (amount) {
+ const size_t num = Ptr ? std::min<size_t>(amount, Ptr - Iter->Begin) : 0;
+ amount -= num;
+ Ptr -= num;
+ if (amount) {
+ Y_VERIFY_DEBUG(Iter != GetChainBegin());
+ --Iter;
+ Ptr = Iter->End;
+ }
+ }
+
+ return *this;
+ }
+
+ TIteratorImpl operator -(size_t amount) const {
+ CheckValid();
+ return TIteratorImpl(*this) -= amount;
+ }
+
+ std::pair<const char*, size_t> operator *() const {
+ return {ContiguousData(), ContiguousSize()};
+ }
+
+ TIteratorImpl& operator ++() {
+ AdvanceToNextContiguousBlock();
+ return *this;
+ }
+
+ TIteratorImpl operator ++(int) const {
+ auto it(*this);
+ it.AdvanceToNextContiguousBlock();
+ return it;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // Operation with contiguous data
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ // Get the pointer to the contiguous block of data; valid locations are [Data; Data + Size).
+ const char *ContiguousData() const {
+ CheckValid();
+ return Ptr;
+ }
+
+ // Get the amount of contiguous block.
+ size_t ContiguousSize() const {
+ CheckValid();
+ return Ptr ? Iter->End - Ptr : 0;
+ }
+
+ size_t ChunkOffset() const {
+ return Ptr ? Ptr - Iter->Begin : 0;
+ }
+
+ // Advance to next contiguous block of data.
+ void AdvanceToNextContiguousBlock() {
+ CheckValid();
+ Y_VERIFY_DEBUG(Valid());
+ ++Iter;
+ Ptr = Iter != GetChainEnd() ? Iter->Begin : nullptr;
+ }
+
+ // Extract some data and advance. Size is not checked here, to it must be provided valid.
+ void ExtractPlainDataAndAdvance(void *buffer, size_t len) {
+ CheckValid();
+
+ while (len) {
+ Y_VERIFY_DEBUG(Ptr);
+
+ // calculate amount of bytes we need to move
+ const size_t max = ContiguousSize();
+ const size_t num = std::min(len, max);
+
+ // copy data to the buffer and advance buffer pointers
+ memcpy(buffer, Ptr, num);
+ buffer = static_cast<char*>(buffer) + num;
+ len -= num;
+
+ // advance iterator itself
+ Ptr += num;
+ if (Ptr == Iter->End) {
+ AdvanceToNextContiguousBlock();
+ }
+ }
+ }
+
+ // Checks if the iterator points to the end of the rope or not.
+ bool Valid() const {
+ CheckValid();
+ return Ptr;
+ }
+
+ template<bool IsOtherConst>
+ bool operator ==(const TIteratorImpl<IsOtherConst>& other) const {
+ Y_VERIFY_DEBUG(Rope == other.Rope);
+ CheckValid();
+ other.CheckValid();
+ return Iter == other.Iter && Ptr == other.Ptr;
+ }
+
+ template<bool IsOtherConst>
+ bool operator !=(const TIteratorImpl<IsOtherConst>& other) const {
+ CheckValid();
+ other.CheckValid();
+ return !(*this == other);
+ }
+
+ private:
+ friend class TRope;
+
+ typename TTraits::TListIterator operator ->() const {
+ CheckValid();
+ return Iter;
+ }
+
+ const TChunk& GetChunk() const {
+ CheckValid();
+ return *Iter;
+ }
+
+ typename TTraits::TListIterator GetChainBegin() const {
+ CheckValid();
+ return Rope->Chain.begin();
+ }
+
+ typename TTraits::TListIterator GetChainEnd() const {
+ CheckValid();
+ return Rope->Chain.end();
+ }
+
+ bool PointsToChunkMiddle() const {
+ CheckValid();
+ return Ptr && Ptr != Iter->Begin;
+ }
+ };
+
+public:
+#ifndef NDEBUG
+ ui32 ValidityToken = 0;
+ ui32 GetValidityToken() const { return ValidityToken; }
+ void InvalidateIterators() { ++ValidityToken; }
+#else
+ void InvalidateIterators() {}
+#endif
+
+public:
+ using TConstIterator = TIteratorImpl<true>;
+ using TIterator = TIteratorImpl<false>;
+
+public:
+ TRope() = default;
+ TRope(const TRope& rope) = default;
+
+ TRope(TRope&& rope)
+ : Chain(std::move(rope.Chain))
+ , Size(std::exchange(rope.Size, 0))
+ {
+ rope.InvalidateIterators();
+ }
+
+ TRope(TString s) {
+ if (s) {
+ Size = s.size();
+ s.reserve(32);
+ Chain.PutToEnd(std::move(s));
+ }
+ }
+
+ TRope(IRopeChunkBackend::TPtr item) {
+ std::tie(std::ignore, Size) = item->GetData();
+ Chain.PutToEnd(std::move(item));
+ }
+
+ TRope(TConstIterator begin, TConstIterator end) {
+ Y_VERIFY_DEBUG(begin.Rope == end.Rope);
+ if (begin.Rope == this) {
+ TRope temp(begin, end);
+ *this = std::move(temp);
+ return;
+ }
+
+ while (begin.Iter != end.Iter) {
+ const size_t size = begin.ContiguousSize();
+ Chain.PutToEnd(TChunk::Slice, begin.ContiguousData(), size, begin.GetChunk());
+ begin.AdvanceToNextContiguousBlock();
+ Size += size;
+ }
+
+ if (begin != end && end.PointsToChunkMiddle()) {
+ Chain.PutToEnd(TChunk::Slice, begin.Ptr, end.Ptr, begin.GetChunk());
+ Size += end.Ptr - begin.Ptr;
+ }
+ }
+
+ ~TRope() {
+ }
+
+ // creates a copy of rope with chunks with inefficient storage ratio being copied with arena allocator
+ static TRope CopySpaceOptimized(TRope&& origin, size_t worstRatioPer1k, TRopeArena& arena);
+
+ TRope& operator=(const TRope& other) {
+ Chain = other.Chain;
+ Size = other.Size;
+ return *this;
+ }
+
+ TRope& operator=(TRope&& other) {
+ Chain = std::move(other.Chain);
+ Size = std::exchange(other.Size, 0);
+ InvalidateIterators();
+ other.InvalidateIterators();
+ return *this;
+ }
+
+ size_t GetSize() const {
+ return Size;
+ }
+
+ bool IsEmpty() const {
+ return !Size;
+ }
+
+ operator bool() const {
+ return Chain;
+ }
+
+ TIterator Begin() {
+ return *this ? TIterator(this, Chain.begin(), Chain.GetFirstChunk().Begin) : End();
+ }
+
+ TIterator End() {
+ return TIterator(this, Chain.end());
+ }
+
+ TIterator Iterator(TChunkList::iterator it) {
+ return TIterator(this, it, it != Chain.end() ? it->Begin : nullptr);
+ }
+
+ TIterator Position(size_t index) {
+ return Begin() + index;
+ }
+
+ TConstIterator Begin() const {
+ return *this ? TConstIterator(this, Chain.begin(), Chain.GetFirstChunk().Begin) : End();
+ }
+
+ TConstIterator End() const {
+ return TConstIterator(this, Chain.end());
+ }
+
+ TConstIterator Position(size_t index) const {
+ return Begin() + index;
+ }
+
+ TConstIterator begin() const { return Begin(); }
+ TConstIterator end() const { return End(); }
+
+ void Erase(TIterator begin, TIterator end) {
+ Cut(begin, end, nullptr);
+ }
+
+ TRope Extract(TIterator begin, TIterator end) {
+ TRope res;
+ Cut(begin, end, &res);
+ return res;
+ }
+
+ void ExtractFront(size_t num, TRope *dest) {
+ Y_VERIFY(Size >= num);
+ if (num == Size && !*dest) {
+ *dest = std::move(*this);
+ return;
+ }
+ Size -= num;
+ dest->Size += num;
+ TChunkList::iterator it, first = Chain.begin();
+ for (it = first; num && num >= it->GetSize(); ++it) {
+ num -= it->GetSize();
+ }
+ if (it != first) {
+ if (dest->Chain) {
+ auto& last = dest->Chain.GetLastChunk();
+ if (last.Backend == first->Backend && last.End == first->Begin) {
+ last.End = first->End;
+ first = Chain.Erase(first); // TODO(alexvru): "it" gets invalidated here on some containers
+ }
+ }
+ dest->Chain.Splice(dest->Chain.end(), Chain, first, it);
+ }
+ if (num) {
+ auto it = Chain.begin();
+ if (dest->Chain) {
+ auto& last = dest->Chain.GetLastChunk();
+ if (last.Backend == first->Backend && last.End == first->Begin) {
+ first->Begin += num;
+ last.End = first->Begin;
+ return;
+ }
+ }
+ dest->Chain.PutToEnd(TChunk::Slice, it->Begin, it->Begin + num, *it);
+ it->Begin += num;
+ }
+ }
+
+ void Insert(TIterator pos, TRope&& rope) {
+ Y_VERIFY_DEBUG(this == pos.Rope);
+ Y_VERIFY_DEBUG(this != &rope);
+
+ if (!rope) {
+ return; // do nothing for empty rope
+ }
+
+ // adjust size
+ Size += std::exchange(rope.Size, 0);
+
+ // check if we have to split the block
+ if (pos.PointsToChunkMiddle()) {
+ pos.Iter = Chain.InsertBefore(pos.Iter, TChunk::Slice, pos->Begin, pos.Ptr, pos.GetChunk());
+ ++pos.Iter;
+ pos->Begin = pos.Ptr;
+ }
+
+ // perform glueing if possible
+ TChunk *ropeLeft = &rope.Chain.GetFirstChunk();
+ TChunk *ropeRight = &rope.Chain.GetLastChunk();
+ bool gluedLeft = false, gluedRight = false;
+ if (pos.Iter != Chain.begin()) { // glue left part whenever possible
+ // obtain iterator to previous chunk
+ auto prev(pos.Iter);
+ --prev;
+ if (prev->End == ropeLeft->Begin && prev->Backend == ropeLeft->Backend) { // it is glueable
+ prev->End = ropeLeft->End;
+ gluedLeft = true;
+ }
+ }
+ if (pos.Iter != Chain.end() && ropeRight->End == pos->Begin && ropeRight->Backend == pos->Backend) {
+ pos->Begin = ropeRight->Begin;
+ gluedRight = true;
+ }
+ if (gluedLeft) {
+ rope.Chain.EraseFront();
+ }
+ if (gluedRight) {
+ if (rope) {
+ rope.Chain.EraseBack();
+ } else { // it looks like double-glueing for the same chunk, we have to drop previous one
+ auto prev(pos.Iter);
+ --prev;
+ pos->Begin = prev->Begin;
+ pos.Iter = Chain.Erase(prev);
+ }
+ }
+ if (rope) { // insert remains
+ Chain.Splice(pos.Iter, rope.Chain, rope.Chain.begin(), rope.Chain.end());
+ }
+ Y_VERIFY_DEBUG(!rope);
+ InvalidateIterators();
+ }
+
+ void EraseFront(size_t len) {
+ Y_VERIFY_DEBUG(Size >= len);
+ Size -= len;
+
+ while (len) {
+ Y_VERIFY_DEBUG(Chain);
+ TChunk& item = Chain.GetFirstChunk();
+ const size_t itemSize = item.GetSize();
+ if (len >= itemSize) {
+ Chain.EraseFront();
+ len -= itemSize;
+ } else {
+ item.Begin += len;
+ break;
+ }
+ }
+
+ InvalidateIterators();
+ }
+
+ void EraseBack(size_t len) {
+ Y_VERIFY_DEBUG(Size >= len);
+ Size -= len;
+
+ while (len) {
+ Y_VERIFY_DEBUG(Chain);
+ TChunk& item = Chain.GetLastChunk();
+ const size_t itemSize = item.GetSize();
+ if (len >= itemSize) {
+ Chain.EraseBack();
+ len -= itemSize;
+ } else {
+ item.End -= len;
+ break;
+ }
+ }
+
+ InvalidateIterators();
+ }
+
+ bool ExtractFrontPlain(void *buffer, size_t len) {
+ // check if we have enough data in the rope
+ if (Size < len) {
+ return false;
+ }
+ Size -= len;
+ while (len) {
+ auto& chunk = Chain.GetFirstChunk();
+ const size_t num = Min(len, chunk.GetSize());
+ memcpy(buffer, chunk.Begin, num);
+ buffer = static_cast<char*>(buffer) + num;
+ len -= num;
+ chunk.Begin += num;
+ if (chunk.Begin == chunk.End) {
+ Chain.Erase(Chain.begin());
+ }
+ }
+ InvalidateIterators();
+ return true;
+ }
+
+ bool FetchFrontPlain(char **ptr, size_t *remain) {
+ const size_t num = Min(*remain, Size);
+ ExtractFrontPlain(*ptr, num);
+ *ptr += num;
+ *remain -= num;
+ return !*remain;
+ }
+
+ static int Compare(const TRope& x, const TRope& y) {
+ TConstIterator xIter = x.Begin(), yIter = y.Begin();
+ while (xIter.Valid() && yIter.Valid()) {
+ const size_t step = std::min(xIter.ContiguousSize(), yIter.ContiguousSize());
+ if (int res = memcmp(xIter.ContiguousData(), yIter.ContiguousData(), step)) {
+ return res;
+ }
+ xIter += step;
+ yIter += step;
+ }
+ return xIter.Valid() - yIter.Valid();
+ }
+
+ // Use this method carefully -- it may significantly reduce performance when misused.
+ TString ConvertToString() const {
+ TString res = TString::Uninitialized(GetSize());
+ Begin().ExtractPlainDataAndAdvance(res.Detach(), res.size());
+ return res;
+ }
+
+ TString DebugString() const {
+ TStringStream s;
+ s << "{Size# " << Size;
+ for (const auto& chunk : Chain) {
+ const char *data;
+ std::tie(data, std::ignore) = chunk.Backend.GetData();
+ s << " [" << chunk.Begin - data << ", " << chunk.End - data << ")@" << chunk.Backend.UniqueId();
+ }
+ s << "}";
+ return s.Str();
+ }
+
+ friend bool operator==(const TRope& x, const TRope& y) { return Compare(x, y) == 0; }
+ friend bool operator!=(const TRope& x, const TRope& y) { return Compare(x, y) != 0; }
+ friend bool operator< (const TRope& x, const TRope& y) { return Compare(x, y) < 0; }
+ friend bool operator<=(const TRope& x, const TRope& y) { return Compare(x, y) <= 0; }
+ friend bool operator> (const TRope& x, const TRope& y) { return Compare(x, y) > 0; }
+ friend bool operator>=(const TRope& x, const TRope& y) { return Compare(x, y) >= 0; }
+
+private:
+ void Cut(TIterator begin, TIterator end, TRope *target) {
+ // ensure all iterators are belong to us
+ Y_VERIFY_DEBUG(this == begin.Rope && this == end.Rope);
+
+ // if begin and end are equal, we do nothing -- checking this case allows us to find out that begin does not
+ // point to End(), for example
+ if (begin == end) {
+ return;
+ }
+
+ auto addBlock = [&](const TChunk& from, const char *begin, const char *end) {
+ if (target) {
+ target->Chain.PutToEnd(TChunk::Slice, begin, end, from);
+ target->Size += end - begin;
+ }
+ Size -= end - begin;
+ };
+
+ // consider special case -- when begin and end point to the same block; in this case we have to split up this
+ // block into two parts
+ if (begin.Iter == end.Iter) {
+ addBlock(begin.GetChunk(), begin.Ptr, end.Ptr);
+ const char *firstChunkBegin = begin.PointsToChunkMiddle() ? begin->Begin : nullptr;
+ begin->Begin = end.Ptr; // this affects both begin and end iterator pointed values
+ if (firstChunkBegin) {
+ Chain.InsertBefore(begin.Iter, TChunk::Slice, firstChunkBegin, begin.Ptr, begin.GetChunk());
+ }
+ } else {
+ // check the first iterator -- if it starts not from the begin of the block, we have to adjust end of the
+ // first block to match begin iterator and switch to next block
+ if (begin.PointsToChunkMiddle()) {
+ addBlock(begin.GetChunk(), begin.Ptr, begin->End);
+ begin->End = begin.Ptr;
+ begin.AdvanceToNextContiguousBlock();
+ }
+
+ // now drop full blocks
+ size_t rangeSize = 0;
+ for (auto it = begin.Iter; it != end.Iter; ++it) {
+ Y_VERIFY_DEBUG(it->GetSize());
+ rangeSize += it->GetSize();
+ }
+ if (rangeSize) {
+ if (target) {
+ end.Iter = target->Chain.Splice(target->Chain.end(), Chain, begin.Iter, end.Iter);
+ target->Size += rangeSize;
+ } else {
+ end.Iter = Chain.Erase(begin.Iter, end.Iter);
+ }
+ Size -= rangeSize;
+ }
+
+ // and cut the last block if necessary
+ if (end.PointsToChunkMiddle()) {
+ addBlock(end.GetChunk(), end->Begin, end.Ptr);
+ end->Begin = end.Ptr;
+ }
+ }
+
+ InvalidateIterators();
+ }
+};
+
+class TRopeArena {
+ using TAllocateCallback = std::function<TIntrusivePtr<IRopeChunkBackend>()>;
+
+ TAllocateCallback Allocator;
+ TRope Arena;
+ size_t Size = 0;
+ THashSet<const void*> AccountedBuffers;
+
+public:
+ TRopeArena(TAllocateCallback&& allocator)
+ : Allocator(std::move(allocator))
+ {}
+
+ TRope CreateRope(const void *buffer, size_t len) {
+ TRope res;
+
+ while (len) {
+ if (Arena) {
+ auto iter = Arena.Begin();
+ Y_VERIFY_DEBUG(iter.Valid());
+ char *dest = const_cast<char*>(iter.ContiguousData());
+ const size_t bytesToCopy = std::min(len, iter.ContiguousSize());
+ memcpy(dest, buffer, bytesToCopy);
+ buffer = static_cast<const char*>(buffer) + bytesToCopy;
+ len -= bytesToCopy;
+ res.Insert(res.End(), Arena.Extract(Arena.Begin(), Arena.Position(bytesToCopy)));
+ } else {
+ Arena.Insert(Arena.End(), TRope(Allocator()));
+ }
+ }
+
+ // align arena on 8-byte boundary
+ const size_t align = 8;
+ if (const size_t padding = Arena.GetSize() % align) {
+ Arena.EraseFront(padding);
+ }
+
+ return res;
+ }
+
+ size_t GetSize() const {
+ return Size;
+ }
+
+ void AccountChunk(const TRope::TChunk& chunk) {
+ if (AccountedBuffers.insert(chunk.Backend.UniqueId()).second) {
+ Size += chunk.GetCapacity();
+ }
+ }
+};
+
+struct TRopeUtils {
+ static void Memset(TRope::TConstIterator dst, char c, size_t size) {
+ while (size) {
+ Y_VERIFY_DEBUG(dst.Valid());
+ size_t len = std::min(size, dst.ContiguousSize());
+ memset(const_cast<char*>(dst.ContiguousData()), c, len);
+ dst += len;
+ size -= len;
+ }
+ }
+
+ static void Memcpy(TRope::TConstIterator dst, TRope::TConstIterator src, size_t size) {
+ while (size) {
+ Y_VERIFY_DEBUG(dst.Valid() && src.Valid(),
+ "Invalid iterator in memcpy: dst.Valid() - %" PRIu32 ", src.Valid() - %" PRIu32,
+ (ui32)dst.Valid(), (ui32)src.Valid());
+ size_t len = std::min(size, std::min(dst.ContiguousSize(), src.ContiguousSize()));
+ memcpy(const_cast<char*>(dst.ContiguousData()), src.ContiguousData(), len);
+ dst += len;
+ src += len;
+ size -= len;
+ }
+ }
+
+ static void Memcpy(TRope::TConstIterator dst, const char* src, size_t size) {
+ while (size) {
+ Y_VERIFY_DEBUG(dst.Valid());
+ size_t len = std::min(size, dst.ContiguousSize());
+ memcpy(const_cast<char*>(dst.ContiguousData()), src, len);
+ size -= len;
+ dst += len;
+ src += len;
+ }
+ }
+
+ static void Memcpy(char* dst, TRope::TConstIterator src, size_t size) {
+ while (size) {
+ Y_VERIFY_DEBUG(src.Valid());
+ size_t len = std::min(size, src.ContiguousSize());
+ memcpy(dst, src.ContiguousData(), len);
+ size -= len;
+ dst += len;
+ src += len;
+ }
+ }
+
+ // copy less or equal to sizeBound bytes, until src is valid
+ static size_t SafeMemcpy(char* dst, TRope::TIterator src, size_t sizeBound) {
+ size_t origSize = sizeBound;
+ while (sizeBound && src.Valid()) {
+ size_t len = Min(sizeBound, src.ContiguousSize());
+ memcpy(dst, src.ContiguousData(), len);
+ sizeBound -= len;
+ dst += len;
+ src += len;
+ }
+ return origSize - sizeBound;
+ }
+};
+
+template<size_t BLOCK, size_t ALIGN = 16>
+class TRopeSlideView {
+ alignas(ALIGN) char Slide[BLOCK]; // use if distance from current point and next chunk is less than BLOCK
+ TRope::TIterator Position; // current position at rope
+ size_t Size;
+ char* Head; // points to data, it might be current rope chunk or Slide
+
+private:
+ void FillBlock() {
+ size_t chunkSize = Position.ContiguousSize();
+ if (chunkSize >= BLOCK) {
+ Size = chunkSize;
+ Head = const_cast<char*>(Position.ContiguousData());
+ } else {
+ Size = TRopeUtils::SafeMemcpy(Slide, Position, BLOCK);
+ Head = Slide;
+ }
+ }
+
+public:
+ TRopeSlideView(TRope::TIterator position)
+ : Position(position)
+ {
+ FillBlock();
+ }
+
+ TRopeSlideView(TRope &rope)
+ : TRopeSlideView(rope.Begin())
+ {}
+
+ // if view on slide then copy slide to rope
+ void FlushBlock() {
+ if (Head == Slide) {
+ TRopeUtils::Memcpy(Position, Head, Size);
+ }
+ }
+
+ TRope::TIterator operator+=(size_t amount) {
+ Position += amount;
+ FillBlock();
+ return Position;
+ }
+
+ TRope::TIterator GetPosition() const {
+ return Position;
+ }
+
+ char* GetHead() const {
+ return Head;
+ }
+
+ ui8* GetUi8Head() const {
+ return reinterpret_cast<ui8*>(Head);
+ }
+
+ size_t ContiguousSize() const {
+ return Size;
+ }
+
+ bool IsOnChunk() const {
+ return Head != Slide;
+ }
+};
+
+inline TRope TRope::CopySpaceOptimized(TRope&& origin, size_t worstRatioPer1k, TRopeArena& arena) {
+ TRope res;
+ for (TChunk& chunk : origin.Chain) {
+ size_t ratio = chunk.GetSize() * 1024 / chunk.GetCapacity();
+ if (ratio < 1024 - worstRatioPer1k) {
+ res.Insert(res.End(), arena.CreateRope(chunk.Begin, chunk.GetSize()));
+ } else {
+ res.Chain.PutToEnd(std::move(chunk));
+ }
+ }
+ res.Size = origin.Size;
+ origin = TRope();
+ for (const TChunk& chunk : res.Chain) {
+ arena.AccountChunk(chunk);
+ }
+ return res;
+}
+
+
+#if defined(WITH_VALGRIND) || defined(_msan_enabled_)
+
+inline void CheckRopeIsDefined(TRope::TConstIterator begin, ui64 size) {
+ while (size) {
+ ui64 contiguousSize = Min(size, begin.ContiguousSize());
+# if defined(WITH_VALGRIND)
+ VALGRIND_CHECK_MEM_IS_DEFINED(begin.ContiguousData(), contiguousSize);
+# endif
+# if defined(_msan_enabled_)
+ NSan::CheckMemIsInitialized(begin.ContiguousData(), contiguousSize);
+# endif
+ size -= contiguousSize;
+ begin += contiguousSize;
+ }
+}
+
+# define CHECK_ROPE_IS_DEFINED(begin, size) CheckRopeIsDefined(begin, size)
+
+#else
+
+# define CHECK_ROPE_IS_DEFINED(begin, size) do {} while (false)
+
+#endif
diff --git a/library/cpp/actors/util/rope_cont_deque.h b/library/cpp/actors/util/rope_cont_deque.h
new file mode 100644
index 0000000000..d1d122c49c
--- /dev/null
+++ b/library/cpp/actors/util/rope_cont_deque.h
@@ -0,0 +1,181 @@
+#pragma once
+
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+#include <deque>
+
+namespace NRopeDetails {
+
+template<typename TChunk>
+class TChunkList {
+ std::deque<TChunk> Chunks;
+
+ static constexpr size_t MaxInplaceItems = 4;
+ using TInplace = TStackVec<TChunk, MaxInplaceItems>;
+ TInplace Inplace;
+
+private:
+ template<typename TChunksIt, typename TInplaceIt, typename TValue>
+ struct TIterator {
+ TChunksIt ChunksIt;
+ TInplaceIt InplaceIt;
+
+ TIterator() = default;
+
+ TIterator(TChunksIt chunksIt, TInplaceIt inplaceIt)
+ : ChunksIt(std::move(chunksIt))
+ , InplaceIt(inplaceIt)
+ {}
+
+ template<typename A, typename B, typename C>
+ TIterator(const TIterator<A, B, C>& other)
+ : ChunksIt(other.ChunksIt)
+ , InplaceIt(other.InplaceIt)
+ {}
+
+ TIterator(const TIterator&) = default;
+ TIterator(TIterator&&) = default;
+ TIterator& operator =(const TIterator&) = default;
+ TIterator& operator =(TIterator&&) = default;
+
+ TValue& operator *() const { return InplaceIt != TInplaceIt() ? *InplaceIt : *ChunksIt; }
+ TValue* operator ->() const { return InplaceIt != TInplaceIt() ? &*InplaceIt : &*ChunksIt; }
+
+ TIterator& operator ++() {
+ if (InplaceIt != TInplaceIt()) {
+ ++InplaceIt;
+ } else {
+ ++ChunksIt;
+ }
+ return *this;
+ }
+
+ TIterator& operator --() {
+ if (InplaceIt != TInplaceIt()) {
+ --InplaceIt;
+ } else {
+ --ChunksIt;
+ }
+ return *this;
+ }
+
+ template<typename A, typename B, typename C>
+ bool operator ==(const TIterator<A, B, C>& other) const {
+ return ChunksIt == other.ChunksIt && InplaceIt == other.InplaceIt;
+ }
+
+ template<typename A, typename B, typename C>
+ bool operator !=(const TIterator<A, B, C>& other) const {
+ return ChunksIt != other.ChunksIt || InplaceIt != other.InplaceIt;
+ }
+ };
+
+public:
+ using iterator = TIterator<typename std::deque<TChunk>::iterator, typename TInplace::iterator, TChunk>;
+ using const_iterator = TIterator<typename std::deque<TChunk>::const_iterator, typename TInplace::const_iterator, const TChunk>;
+
+public:
+ TChunkList() = default;
+ TChunkList(const TChunkList& other) = default;
+ TChunkList(TChunkList&& other) = default;
+ TChunkList& operator=(const TChunkList& other) = default;
+ TChunkList& operator=(TChunkList&& other) = default;
+
+ template<typename... TArgs>
+ void PutToEnd(TArgs&&... args) {
+ InsertBefore(end(), std::forward<TArgs>(args)...);
+ }
+
+ template<typename... TArgs>
+ iterator InsertBefore(iterator pos, TArgs&&... args) {
+ if (!Inplace) {
+ pos.InplaceIt = Inplace.end();
+ }
+ if (Chunks.empty() && Inplace.size() < MaxInplaceItems) {
+ return {{}, Inplace.emplace(pos.InplaceIt, std::forward<TArgs>(args)...)};
+ } else {
+ if (Inplace) {
+ Y_VERIFY_DEBUG(Chunks.empty());
+ for (auto& item : Inplace) {
+ Chunks.push_back(std::move(item));
+ }
+ pos.ChunksIt = pos.InplaceIt - Inplace.begin() + Chunks.begin();
+ Inplace.clear();
+ }
+ return {Chunks.emplace(pos.ChunksIt, std::forward<TArgs>(args)...), {}};
+ }
+ }
+
+ iterator Erase(iterator pos) {
+ if (Inplace) {
+ return {{}, Inplace.erase(pos.InplaceIt)};
+ } else {
+ return {Chunks.erase(pos.ChunksIt), {}};
+ }
+ }
+
+ iterator Erase(iterator first, iterator last) {
+ if (Inplace) {
+ return {{}, Inplace.erase(first.InplaceIt, last.InplaceIt)};
+ } else {
+ return {Chunks.erase(first.ChunksIt, last.ChunksIt), {}};
+ }
+ }
+
+ void EraseFront() {
+ if (Inplace) {
+ Inplace.erase(Inplace.begin());
+ } else {
+ Chunks.pop_front();
+ }
+ }
+
+ void EraseBack() {
+ if (Inplace) {
+ Inplace.pop_back();
+ } else {
+ Chunks.pop_back();
+ }
+ }
+
+ iterator Splice(iterator pos, TChunkList& from, iterator first, iterator last) {
+ if (!Inplace) {
+ pos.InplaceIt = Inplace.end();
+ }
+ size_t n = 0;
+ for (auto it = first; it != last; ++it, ++n)
+ {}
+ if (Chunks.empty() && Inplace.size() + n <= MaxInplaceItems) {
+ if (first.InplaceIt != typename TInplace::iterator()) {
+ Inplace.insert(pos.InplaceIt, first.InplaceIt, last.InplaceIt);
+ } else {
+ Inplace.insert(pos.InplaceIt, first.ChunksIt, last.ChunksIt);
+ }
+ } else {
+ if (Inplace) {
+ Y_VERIFY_DEBUG(Chunks.empty());
+ for (auto& item : Inplace) {
+ Chunks.push_back(std::move(item));
+ }
+ pos.ChunksIt = pos.InplaceIt - Inplace.begin() + Chunks.begin();
+ Inplace.clear();
+ }
+ if (first.InplaceIt != typename TInplace::iterator()) {
+ Chunks.insert(pos.ChunksIt, first.InplaceIt, last.InplaceIt);
+ } else {
+ Chunks.insert(pos.ChunksIt, first.ChunksIt, last.ChunksIt);
+ }
+ }
+ return from.Erase(first, last);
+ }
+
+ operator bool() const { return !Inplace.empty() || !Chunks.empty(); }
+ TChunk& GetFirstChunk() { return Inplace ? Inplace.front() : Chunks.front(); }
+ const TChunk& GetFirstChunk() const { return Inplace ? Inplace.front() : Chunks.front(); }
+ TChunk& GetLastChunk() { return Inplace ? Inplace.back() : Chunks.back(); }
+ iterator begin() { return {Chunks.begin(), Inplace ? Inplace.begin() : typename TInplace::iterator()}; }
+ const_iterator begin() const { return {Chunks.begin(), Inplace ? Inplace.begin() : typename TInplace::const_iterator()}; }
+ iterator end() { return {Chunks.end(), Inplace ? Inplace.end() : typename TInplace::iterator()}; }
+ const_iterator end() const { return {Chunks.end(), Inplace ? Inplace.end() : typename TInplace::const_iterator()}; }
+};
+
+} // NRopeDetails
diff --git a/library/cpp/actors/util/rope_cont_list.h b/library/cpp/actors/util/rope_cont_list.h
new file mode 100644
index 0000000000..18c136284e
--- /dev/null
+++ b/library/cpp/actors/util/rope_cont_list.h
@@ -0,0 +1,159 @@
+#pragma once
+
+#include <util/generic/intrlist.h>
+
+namespace NRopeDetails {
+
+template<typename TChunk>
+class TChunkList {
+ struct TItem : TIntrusiveListItem<TItem>, TChunk {
+ // delegating constructor
+ template<typename... TArgs> TItem(TArgs&&... args) : TChunk(std::forward<TArgs>(args)...) {}
+ };
+
+ using TList = TIntrusiveList<TItem>;
+ TList List;
+
+ static constexpr size_t NumInplaceItems = 2;
+ char InplaceItems[sizeof(TItem) * NumInplaceItems];
+
+ template<typename... TArgs>
+ TItem *AllocateItem(TArgs&&... args) {
+ for (size_t index = 0; index < NumInplaceItems; ++index) {
+ TItem *chunk = GetInplaceItemPtr(index);
+ if (!TItem::IsInUse(*chunk)) {
+ return new(chunk) TItem(std::forward<TArgs>(args)...);
+ }
+ }
+ return new TItem(std::forward<TArgs>(args)...);
+ }
+
+ void ReleaseItem(TItem *chunk) {
+ if (IsInplaceItem(chunk)) {
+ chunk->~TItem();
+ TItem::Clear(*chunk);
+ } else {
+ delete chunk;
+ }
+ }
+
+ void ReleaseItems(TList& list) {
+ while (list) {
+ ReleaseItem(list.Front());
+ }
+ }
+
+ void Prepare() {
+ for (size_t index = 0; index < NumInplaceItems; ++index) {
+ TItem::Clear(*GetInplaceItemPtr(index));
+ }
+ }
+
+ TItem *GetInplaceItemPtr(size_t index) { return reinterpret_cast<TItem*>(InplaceItems + index * sizeof(TItem)); }
+ bool IsInplaceItem(TItem *chunk) { return chunk >= GetInplaceItemPtr(0) && chunk < GetInplaceItemPtr(NumInplaceItems); }
+
+public:
+ using iterator = typename TList::iterator;
+ using const_iterator = typename TList::const_iterator;
+
+public:
+ TChunkList() {
+ Prepare();
+ }
+
+ ~TChunkList() {
+ ReleaseItems(List);
+#ifndef NDEBUG
+ for (size_t index = 0; index < NumInplaceItems; ++index) {
+ Y_VERIFY(!TItem::IsInUse(*GetInplaceItemPtr(index)));
+ }
+#endif
+ }
+
+ TChunkList(const TChunkList& other) {
+ Prepare();
+ for (const TItem& chunk : other.List) {
+ PutToEnd(TChunk(chunk));
+ }
+ }
+
+ TChunkList(TChunkList&& other) {
+ Prepare();
+ Splice(end(), other, other.begin(), other.end());
+ }
+
+ TChunkList& operator=(const TChunkList& other) {
+ if (this != &other) {
+ ReleaseItems(List);
+ for (const TItem& chunk : other.List) {
+ PutToEnd(TChunk(chunk));
+ }
+ }
+ return *this;
+ }
+
+ TChunkList& operator=(TChunkList&& other) {
+ if (this != &other) {
+ ReleaseItems(List);
+ Splice(end(), other, other.begin(), other.end());
+ }
+ return *this;
+ }
+
+ template<typename... TArgs>
+ void PutToEnd(TArgs&&... args) {
+ InsertBefore(end(), std::forward<TArgs>(args)...);
+ }
+
+ template<typename... TArgs>
+ iterator InsertBefore(iterator pos, TArgs&&... args) {
+ TItem *item = AllocateItem<TArgs...>(std::forward<TArgs>(args)...);
+ item->LinkBefore(pos.Item());
+ return item;
+ }
+
+ iterator Erase(iterator pos) {
+ ReleaseItem(&*pos++);
+ return pos;
+ }
+
+ iterator Erase(iterator first, iterator last) {
+ TList temp;
+ TList::Cut(first, last, temp.end());
+ ReleaseItems(temp);
+ return last;
+ }
+
+ void EraseFront() {
+ ReleaseItem(List.PopFront());
+ }
+
+ void EraseBack() {
+ ReleaseItem(List.PopBack());
+ }
+
+ iterator Splice(iterator pos, TChunkList& from, iterator first, iterator last) {
+ for (auto it = first; it != last; ) {
+ if (from.IsInplaceItem(&*it)) {
+ TList::Cut(first, it, pos);
+ InsertBefore(pos, std::move(*it));
+ it = first = from.Erase(it);
+ } else {
+ ++it;
+ }
+ }
+ TList::Cut(first, last, pos);
+ return last;
+ }
+
+ operator bool() const { return static_cast<bool>(List); }
+ TChunk& GetFirstChunk() { return *List.Front(); }
+ const TChunk& GetFirstChunk() const { return *List.Front(); }
+ TChunk& GetLastChunk() { return *List.Back(); }
+ iterator begin() { return List.begin(); }
+ const_iterator begin() const { return List.begin(); }
+ iterator end() { return List.end(); }
+ const_iterator end() const { return List.end(); }
+};
+
+} // NRopeDetails
diff --git a/library/cpp/actors/util/rope_ut.cpp b/library/cpp/actors/util/rope_ut.cpp
new file mode 100644
index 0000000000..cabeed9230
--- /dev/null
+++ b/library/cpp/actors/util/rope_ut.cpp
@@ -0,0 +1,231 @@
+#include "rope.h"
+#include <library/cpp/testing/unittest/registar.h>
+#include <util/random/random.h>
+
+class TRopeStringBackend : public IRopeChunkBackend {
+ TString Buffer;
+
+public:
+ TRopeStringBackend(TString buffer)
+ : Buffer(std::move(buffer))
+ {}
+
+ TData GetData() const override {
+ return {Buffer.data(), Buffer.size()};
+ }
+
+ size_t GetCapacity() const override {
+ return Buffer.capacity();
+ }
+};
+
+TRope CreateRope(TString s, size_t sliceSize) {
+ TRope res;
+ for (size_t i = 0; i < s.size(); ) {
+ size_t len = std::min(sliceSize, s.size() - i);
+ if (i % 2) {
+ res.Insert(res.End(), TRope(MakeIntrusive<TRopeStringBackend>(s.substr(i, len))));
+ } else {
+ res.Insert(res.End(), TRope(s.substr(i, len)));
+ }
+ i += len;
+ }
+ return res;
+}
+
+TString RopeToString(const TRope& rope) {
+ TString res;
+ auto iter = rope.Begin();
+ while (iter != rope.End()) {
+ res.append(iter.ContiguousData(), iter.ContiguousSize());
+ iter.AdvanceToNextContiguousBlock();
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(rope.GetSize(), res.size());
+
+ TString temp = TString::Uninitialized(rope.GetSize());
+ rope.Begin().ExtractPlainDataAndAdvance(temp.Detach(), temp.size());
+ UNIT_ASSERT_VALUES_EQUAL(temp, res);
+
+ return res;
+}
+
+TString Text = "No elements are copied or moved, only the internal pointers of the list nodes are re-pointed.";
+
+Y_UNIT_TEST_SUITE(TRope) {
+
+ Y_UNIT_TEST(Leak) {
+ const size_t begin = 10, end = 20;
+ TRope rope = CreateRope(Text, 10);
+ rope.Erase(rope.Begin() + begin, rope.Begin() + end);
+ }
+
+ Y_UNIT_TEST(BasicRange) {
+ TRope rope = CreateRope(Text, 10);
+ for (size_t begin = 0; begin < Text.size(); ++begin) {
+ for (size_t end = begin; end <= Text.size(); ++end) {
+ TRope::TIterator rBegin = rope.Begin() + begin;
+ TRope::TIterator rEnd = rope.Begin() + end;
+ UNIT_ASSERT_VALUES_EQUAL(RopeToString(TRope(rBegin, rEnd)), Text.substr(begin, end - begin));
+ }
+ }
+ }
+
+ Y_UNIT_TEST(Erase) {
+ for (size_t begin = 0; begin < Text.size(); ++begin) {
+ for (size_t end = begin; end <= Text.size(); ++end) {
+ TRope rope = CreateRope(Text, 10);
+ rope.Erase(rope.Begin() + begin, rope.Begin() + end);
+ TString text = Text;
+ text.erase(text.begin() + begin, text.begin() + end);
+ UNIT_ASSERT_VALUES_EQUAL(RopeToString(rope), text);
+ }
+ }
+ }
+
+ Y_UNIT_TEST(Insert) {
+ TRope rope = CreateRope(Text, 10);
+ for (size_t begin = 0; begin < Text.size(); ++begin) {
+ for (size_t end = begin; end <= Text.size(); ++end) {
+ TRope part = TRope(rope.Begin() + begin, rope.Begin() + end);
+ for (size_t where = 0; where <= Text.size(); ++where) {
+ TRope x(rope);
+ x.Insert(x.Begin() + where, TRope(part));
+ UNIT_ASSERT_VALUES_EQUAL(x.GetSize(), rope.GetSize() + part.GetSize());
+ TString text = Text;
+ text.insert(text.begin() + where, Text.begin() + begin, Text.begin() + end);
+ UNIT_ASSERT_VALUES_EQUAL(RopeToString(x), text);
+ }
+ }
+ }
+ }
+
+ Y_UNIT_TEST(Extract) {
+ for (size_t begin = 0; begin < Text.size(); ++begin) {
+ for (size_t end = begin; end <= Text.size(); ++end) {
+ TRope rope = CreateRope(Text, 10);
+ TRope part = rope.Extract(rope.Begin() + begin, rope.Begin() + end);
+ TString text = Text;
+ text.erase(text.begin() + begin, text.begin() + end);
+ UNIT_ASSERT_VALUES_EQUAL(RopeToString(rope), text);
+ UNIT_ASSERT_VALUES_EQUAL(RopeToString(part), Text.substr(begin, end - begin));
+ }
+ }
+ }
+
+ Y_UNIT_TEST(EraseFront) {
+ for (size_t pos = 0; pos <= Text.size(); ++pos) {
+ TRope rope = CreateRope(Text, 10);
+ rope.EraseFront(pos);
+ UNIT_ASSERT_VALUES_EQUAL(RopeToString(rope), Text.substr(pos));
+ }
+ }
+
+ Y_UNIT_TEST(EraseBack) {
+ for (size_t pos = 0; pos <= Text.size(); ++pos) {
+ TRope rope = CreateRope(Text, 10);
+ rope.EraseBack(pos);
+ UNIT_ASSERT_VALUES_EQUAL(RopeToString(rope), Text.substr(0, Text.size() - pos));
+ }
+ }
+
+ Y_UNIT_TEST(ExtractFront) {
+ for (size_t step = 1; step <= Text.size(); ++step) {
+ TRope rope = CreateRope(Text, 10);
+ TRope out;
+ while (const size_t len = Min(step, rope.GetSize())) {
+ rope.ExtractFront(len, &out);
+ UNIT_ASSERT(rope.GetSize() + out.GetSize() == Text.size());
+ UNIT_ASSERT_VALUES_EQUAL(RopeToString(out), Text.substr(0, out.GetSize()));
+ }
+ }
+ }
+
+ Y_UNIT_TEST(ExtractFrontPlain) {
+ for (size_t step = 1; step <= Text.size(); ++step) {
+ TRope rope = CreateRope(Text, 10);
+ TString buffer = Text;
+ auto it = rope.Begin();
+ size_t remain = rope.GetSize();
+ while (const size_t len = Min(step, remain)) {
+ TString data = TString::Uninitialized(len);
+ it.ExtractPlainDataAndAdvance(data.Detach(), data.size());
+ UNIT_ASSERT_VALUES_EQUAL(data, buffer.substr(0, len));
+ UNIT_ASSERT_VALUES_EQUAL(RopeToString(TRope(it, rope.End())), buffer.substr(len));
+ buffer = buffer.substr(len);
+ remain -= len;
+ }
+ }
+ }
+
+ Y_UNIT_TEST(FetchFrontPlain) {
+ char s[10];
+ char *data = s;
+ size_t remain = sizeof(s);
+ TRope rope = TRope(TString("HELLO"));
+ UNIT_ASSERT(!rope.FetchFrontPlain(&data, &remain));
+ UNIT_ASSERT(!rope);
+ rope.Insert(rope.End(), TRope(TString("WORLD!!!")));
+ UNIT_ASSERT(rope.FetchFrontPlain(&data, &remain));
+ UNIT_ASSERT(!remain);
+ UNIT_ASSERT(rope.GetSize() == 3);
+ UNIT_ASSERT_VALUES_EQUAL(rope.ConvertToString(), "!!!");
+ UNIT_ASSERT(!strncmp(s, "HELLOWORLD", 10));
+ }
+
+ Y_UNIT_TEST(Glueing) {
+ TRope rope = CreateRope(Text, 10);
+ for (size_t begin = 0; begin <= Text.size(); ++begin) {
+ for (size_t end = begin; end <= Text.size(); ++end) {
+ TString repr = rope.DebugString();
+ TRope temp = rope.Extract(rope.Position(begin), rope.Position(end));
+ rope.Insert(rope.Position(begin), std::move(temp));
+ UNIT_ASSERT_VALUES_EQUAL(repr, rope.DebugString());
+ UNIT_ASSERT_VALUES_EQUAL(RopeToString(rope), Text);
+ }
+ }
+ }
+
+ Y_UNIT_TEST(IterWalk) {
+ TRope rope = CreateRope(Text, 10);
+ for (size_t step1 = 0; step1 <= rope.GetSize(); ++step1) {
+ for (size_t step2 = 0; step2 <= step1; ++step2) {
+ TRope::TConstIterator iter = rope.Begin();
+ iter += step1;
+ iter -= step2;
+ UNIT_ASSERT(iter == rope.Position(step1 - step2));
+ }
+ }
+ }
+
+ Y_UNIT_TEST(Compare) {
+ auto check = [](const TString& x, const TString& y) {
+ const TRope xRope = CreateRope(x, 7);
+ const TRope yRope = CreateRope(y, 11);
+ UNIT_ASSERT_VALUES_EQUAL(xRope == yRope, x == y);
+ UNIT_ASSERT_VALUES_EQUAL(xRope != yRope, x != y);
+ UNIT_ASSERT_VALUES_EQUAL(xRope < yRope, x < y);
+ UNIT_ASSERT_VALUES_EQUAL(xRope <= yRope, x <= y);
+ UNIT_ASSERT_VALUES_EQUAL(xRope > yRope, x > y);
+ UNIT_ASSERT_VALUES_EQUAL(xRope >= yRope, x >= y);
+ };
+
+ TVector<TString> pool;
+ for (size_t k = 0; k < 10; ++k) {
+ size_t len = RandomNumber<size_t>(100) + 100;
+ TString s = TString::Uninitialized(len);
+ char *p = s.Detach();
+ for (size_t j = 0; j < len; ++j) {
+ *p++ = RandomNumber<unsigned char>();
+ }
+ pool.push_back(std::move(s));
+ }
+
+ for (const TString& x : pool) {
+ for (const TString& y : pool) {
+ check(x, y);
+ }
+ }
+ }
+
+}
diff --git a/library/cpp/actors/util/should_continue.cpp b/library/cpp/actors/util/should_continue.cpp
new file mode 100644
index 0000000000..258e6a0aff
--- /dev/null
+++ b/library/cpp/actors/util/should_continue.cpp
@@ -0,0 +1,23 @@
+#include "should_continue.h"
+
+void TProgramShouldContinue::ShouldRestart() {
+ AtomicSet(State, Restart);
+}
+
+void TProgramShouldContinue::ShouldStop(int returnCode) {
+ AtomicSet(ReturnCode, returnCode);
+ AtomicSet(State, Stop);
+}
+
+TProgramShouldContinue::EState TProgramShouldContinue::PollState() {
+ return static_cast<EState>(AtomicGet(State));
+}
+
+int TProgramShouldContinue::GetReturnCode() {
+ return static_cast<int>(AtomicGet(ReturnCode));
+}
+
+void TProgramShouldContinue::Reset() {
+ AtomicSet(ReturnCode, 0);
+ AtomicSet(State, Continue);
+}
diff --git a/library/cpp/actors/util/should_continue.h b/library/cpp/actors/util/should_continue.h
new file mode 100644
index 0000000000..76acc40dc4
--- /dev/null
+++ b/library/cpp/actors/util/should_continue.h
@@ -0,0 +1,22 @@
+#pragma once
+#include "defs.h"
+
+class TProgramShouldContinue {
+public:
+ enum EState {
+ Continue,
+ Stop,
+ Restart,
+ };
+
+ void ShouldRestart();
+ void ShouldStop(int returnCode = 0);
+
+ EState PollState();
+ int GetReturnCode();
+
+ void Reset();
+private:
+ TAtomic ReturnCode = 0;
+ TAtomic State = Continue;
+};
diff --git a/library/cpp/actors/util/thread.h b/library/cpp/actors/util/thread.h
new file mode 100644
index 0000000000..d742c8c585
--- /dev/null
+++ b/library/cpp/actors/util/thread.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <util/generic/strbuf.h>
+#include <util/stream/str.h>
+#include <util/system/execpath.h>
+#include <util/system/thread.h>
+#include <util/system/thread.h>
+#include <time.h>
+
+inline void SetCurrentThreadName(const TString& name,
+ const ui32 maxCharsFromProcessName = 8) {
+#if defined(_linux_)
+ // linux limits threadname by 15 + \0
+
+ TStringBuf procName(GetExecPath());
+ procName = procName.RNextTok('/');
+ procName = procName.SubStr(0, maxCharsFromProcessName);
+
+ TStringStream linuxName;
+ linuxName << procName << "." << name;
+ TThread::SetCurrentThreadName(linuxName.Str().data());
+#else
+ Y_UNUSED(maxCharsFromProcessName);
+ TThread::SetCurrentThreadName(name.data());
+#endif
+}
diff --git a/library/cpp/actors/util/threadparkpad.cpp b/library/cpp/actors/util/threadparkpad.cpp
new file mode 100644
index 0000000000..74069ff15b
--- /dev/null
+++ b/library/cpp/actors/util/threadparkpad.cpp
@@ -0,0 +1,148 @@
+#include "threadparkpad.h"
+#include <util/system/winint.h>
+
+#ifdef _linux_
+
+#include "futex.h"
+
+namespace NActors {
+ class TThreadParkPad::TImpl {
+ volatile bool Interrupted;
+ int Futex;
+
+ public:
+ TImpl()
+ : Interrupted(false)
+ , Futex(0)
+ {
+ }
+ ~TImpl() {
+ }
+
+ bool Park() noexcept {
+ __atomic_fetch_sub(&Futex, 1, __ATOMIC_SEQ_CST);
+ while (__atomic_load_n(&Futex, __ATOMIC_ACQUIRE) == -1)
+ SysFutex(&Futex, FUTEX_WAIT_PRIVATE, -1, nullptr, nullptr, 0);
+ return IsInterrupted();
+ }
+
+ void Unpark() noexcept {
+ const int old = __atomic_fetch_add(&Futex, 1, __ATOMIC_SEQ_CST);
+ if (old == -1)
+ SysFutex(&Futex, FUTEX_WAKE_PRIVATE, -1, nullptr, nullptr, 0);
+ }
+
+ void Interrupt() noexcept {
+ __atomic_store_n(&Interrupted, true, __ATOMIC_SEQ_CST);
+ Unpark();
+ }
+
+ bool IsInterrupted() const noexcept {
+ return __atomic_load_n(&Interrupted, __ATOMIC_ACQUIRE);
+ }
+ };
+
+#elif defined _win32_
+#include <util/generic/bt_exception.h>
+#include <util/generic/yexception.h>
+
+namespace NActors {
+ class TThreadParkPad::TImpl {
+ TAtomic Interrupted;
+ HANDLE EvHandle;
+
+ public:
+ TImpl()
+ : Interrupted(false)
+ {
+ EvHandle = ::CreateEvent(0, false, false, 0);
+ if (!EvHandle)
+ ythrow TWithBackTrace<yexception>() << "::CreateEvent failed";
+ }
+ ~TImpl() {
+ if (EvHandle)
+ ::CloseHandle(EvHandle);
+ }
+
+ bool Park() noexcept {
+ ::WaitForSingleObject(EvHandle, INFINITE);
+ return AtomicGet(Interrupted);
+ }
+
+ void Unpark() noexcept {
+ ::SetEvent(EvHandle);
+ }
+
+ void Interrupt() noexcept {
+ AtomicSet(Interrupted, true);
+ Unpark();
+ }
+
+ bool IsInterrupted() const noexcept {
+ return AtomicGet(Interrupted);
+ }
+ };
+
+#else
+
+#include <util/system/event.h>
+
+namespace NActors {
+ class TThreadParkPad::TImpl {
+ TAtomic Interrupted;
+ TSystemEvent Ev;
+
+ public:
+ TImpl()
+ : Interrupted(false)
+ , Ev(TSystemEvent::rAuto)
+ {
+ }
+ ~TImpl() {
+ }
+
+ bool Park() noexcept {
+ Ev.Wait();
+ return AtomicGet(Interrupted);
+ }
+
+ void Unpark() noexcept {
+ Ev.Signal();
+ }
+
+ void Interrupt() noexcept {
+ AtomicSet(Interrupted, true);
+ Unpark();
+ }
+
+ bool IsInterrupted() const noexcept {
+ return AtomicGet(Interrupted);
+ }
+ };
+#endif
+
+ TThreadParkPad::TThreadParkPad()
+ : Impl(new TThreadParkPad::TImpl())
+ {
+ }
+
+ TThreadParkPad::~TThreadParkPad() {
+ }
+
+ bool TThreadParkPad::Park() noexcept {
+ return Impl->Park();
+ }
+
+ void TThreadParkPad::Unpark() noexcept {
+ Impl->Unpark();
+ }
+
+ void TThreadParkPad::Interrupt() noexcept {
+ Impl->Interrupt();
+ }
+
+ bool TThreadParkPad::Interrupted() const noexcept {
+ return Impl->IsInterrupted();
+ }
+
+}
diff --git a/library/cpp/actors/util/threadparkpad.h b/library/cpp/actors/util/threadparkpad.h
new file mode 100644
index 0000000000..5b574ccf34
--- /dev/null
+++ b/library/cpp/actors/util/threadparkpad.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <util/generic/ptr.h>
+
+namespace NActors {
+ class TThreadParkPad {
+ private:
+ class TImpl;
+ THolder<TImpl> Impl;
+
+ public:
+ TThreadParkPad();
+ ~TThreadParkPad();
+
+ bool Park() noexcept;
+ void Unpark() noexcept;
+ void Interrupt() noexcept;
+ bool Interrupted() const noexcept;
+ };
+
+}
diff --git a/library/cpp/actors/util/ticket_lock.h b/library/cpp/actors/util/ticket_lock.h
new file mode 100644
index 0000000000..3b1fa80393
--- /dev/null
+++ b/library/cpp/actors/util/ticket_lock.h
@@ -0,0 +1,48 @@
+#pragma once
+
+#include "intrinsics.h"
+#include <util/system/guard.h>
+#include <util/system/yassert.h>
+
+class TTicketLock : TNonCopyable {
+ ui32 TicketIn;
+ ui32 TicketOut;
+
+public:
+ TTicketLock()
+ : TicketIn(0)
+ , TicketOut(0)
+ {
+ }
+
+ void Release() noexcept {
+ AtomicUi32Increment(&TicketOut);
+ }
+
+ ui32 Acquire() noexcept {
+ ui32 revolves = 0;
+ const ui32 ticket = AtomicUi32Increment(&TicketIn) - 1;
+ while (ticket != AtomicLoad(&TicketOut)) {
+ Y_VERIFY_DEBUG(ticket >= AtomicLoad(&TicketOut));
+ SpinLockPause();
+ ++revolves;
+ }
+ return revolves;
+ }
+
+ bool TryAcquire() noexcept {
+ const ui32 x = AtomicLoad(&TicketOut);
+ if (x == AtomicLoad(&TicketIn) && AtomicUi32Cas(&TicketIn, x + 1, x))
+ return true;
+ else
+ return false;
+ }
+
+ bool IsLocked() noexcept {
+ const ui32 ticketIn = AtomicLoad(&TicketIn);
+ const ui32 ticketOut = AtomicLoad(&TicketOut);
+ return (ticketIn != ticketOut);
+ }
+
+ typedef ::TGuard<TTicketLock> TGuard;
+};
diff --git a/library/cpp/actors/util/timerfd.h b/library/cpp/actors/util/timerfd.h
new file mode 100644
index 0000000000..3189e2a672
--- /dev/null
+++ b/library/cpp/actors/util/timerfd.h
@@ -0,0 +1,65 @@
+#pragma once
+
+#include "datetime.h"
+
+#include <util/generic/noncopyable.h>
+
+#ifdef _linux_
+
+#include <util/system/yassert.h>
+#include <errno.h>
+#include <sys/timerfd.h>
+
+struct TTimerFd: public TNonCopyable {
+ int Fd;
+
+ TTimerFd() {
+ Fd = timerfd_create(CLOCK_MONOTONIC, 0);
+ Y_VERIFY(Fd != -1, "timerfd_create(CLOCK_MONOTONIC, 0) -> -1; errno:%d: %s", int(errno), strerror(errno));
+ }
+
+ ~TTimerFd() {
+ close(Fd);
+ }
+
+ void Set(ui64 ts) {
+ ui64 now = GetCycleCountFast();
+ Arm(now >= ts? 1: NHPTimer::GetSeconds(ts - now) * 1e9);
+ }
+
+ void Reset() {
+ Arm(0); // disarm timer
+ }
+
+ void Wait() {
+ ui64 expirations;
+ ssize_t s = read(Fd, &expirations, sizeof(ui64));
+ Y_UNUSED(s); // Y_VERIFY(s == sizeof(ui64));
+ }
+
+ void Wake() {
+ Arm(1);
+ }
+private:
+ void Arm(ui64 ns) {
+ struct itimerspec spec;
+ spec.it_value.tv_sec = ns / 1'000'000'000;
+ spec.it_value.tv_nsec = ns % 1'000'000'000;
+ spec.it_interval.tv_sec = 0;
+ spec.it_interval.tv_nsec = 0;
+ int ret = timerfd_settime(Fd, 0, &spec, nullptr);
+ Y_VERIFY(ret != -1, "timerfd_settime(%d, 0, %" PRIu64 "ns, 0) -> %d; errno:%d: %s", Fd, ns, ret, int(errno), strerror(errno));
+ }
+};
+
+#else
+
+struct TTimerFd: public TNonCopyable {
+ int Fd = 0;
+ void Set(ui64) {}
+ void Reset() {}
+ void Wait() {}
+ void Wake() {}
+};
+
+#endif
diff --git a/library/cpp/actors/util/unordered_cache.h b/library/cpp/actors/util/unordered_cache.h
new file mode 100644
index 0000000000..76f036c0cf
--- /dev/null
+++ b/library/cpp/actors/util/unordered_cache.h
@@ -0,0 +1,201 @@
+#pragma once
+
+#include "defs.h"
+#include "queue_chunk.h"
+
+template <typename T, ui32 Size = 512, ui32 ConcurrencyFactor = 1, typename TChunk = TQueueChunk<T, Size>>
+class TUnorderedCache : TNonCopyable {
+ static_assert(std::is_integral<T>::value || std::is_pointer<T>::value, "expect std::is_integral<T>::value || std::is_pointer<T>::value");
+
+public:
+ static constexpr ui32 Concurrency = ConcurrencyFactor * 4;
+
+private:
+ struct TReadSlot {
+ TChunk* volatile ReadFrom;
+ volatile ui32 ReadPosition;
+ char Padding[64 - sizeof(TChunk*) - sizeof(ui32)]; // 1 slot per cache line
+ };
+
+ struct TWriteSlot {
+ TChunk* volatile WriteTo;
+ volatile ui32 WritePosition;
+ char Padding[64 - sizeof(TChunk*) - sizeof(ui32)]; // 1 slot per cache line
+ };
+
+ static_assert(sizeof(TReadSlot) == 64, "expect sizeof(TReadSlot) == 64");
+ static_assert(sizeof(TWriteSlot) == 64, "expect sizeof(TWriteSlot) == 64");
+
+private:
+ TReadSlot ReadSlots[Concurrency];
+ TWriteSlot WriteSlots[Concurrency];
+
+ static_assert(sizeof(TChunk*) == sizeof(TAtomic), "expect sizeof(TChunk*) == sizeof(TAtomic)");
+
+private:
+ struct TLockedWriter {
+ TWriteSlot* Slot;
+ TChunk* WriteTo;
+
+ TLockedWriter()
+ : Slot(nullptr)
+ , WriteTo(nullptr)
+ { }
+
+ TLockedWriter(TWriteSlot* slot, TChunk* writeTo)
+ : Slot(slot)
+ , WriteTo(writeTo)
+ { }
+
+ ~TLockedWriter() noexcept {
+ Drop();
+ }
+
+ void Drop() {
+ if (Slot) {
+ AtomicStore(&Slot->WriteTo, WriteTo);
+ Slot = nullptr;
+ }
+ }
+
+ TLockedWriter(const TLockedWriter&) = delete;
+ TLockedWriter& operator=(const TLockedWriter&) = delete;
+
+ TLockedWriter(TLockedWriter&& rhs)
+ : Slot(rhs.Slot)
+ , WriteTo(rhs.WriteTo)
+ {
+ rhs.Slot = nullptr;
+ }
+
+ TLockedWriter& operator=(TLockedWriter&& rhs) {
+ if (Y_LIKELY(this != &rhs)) {
+ Drop();
+ Slot = rhs.Slot;
+ WriteTo = rhs.WriteTo;
+ rhs.Slot = nullptr;
+ }
+ return *this;
+ }
+ };
+
+private:
+ TLockedWriter LockWriter(ui64 writerRotation) {
+ ui32 cycle = 0;
+ for (;;) {
+ TWriteSlot* slot = &WriteSlots[writerRotation % Concurrency];
+ if (AtomicLoad(&slot->WriteTo) != nullptr) {
+ if (TChunk* writeTo = AtomicSwap(&slot->WriteTo, nullptr)) {
+ return TLockedWriter(slot, writeTo);
+ }
+ }
+ ++writerRotation;
+
+ // Do a spinlock pause after a full cycle
+ if (++cycle == Concurrency) {
+ SpinLockPause();
+ cycle = 0;
+ }
+ }
+ }
+
+ void WriteOne(TLockedWriter& lock, T x) {
+ Y_VERIFY_DEBUG(x != 0);
+
+ const ui32 pos = AtomicLoad(&lock.Slot->WritePosition);
+ if (pos != TChunk::EntriesCount) {
+ AtomicStore(&lock.Slot->WritePosition, pos + 1);
+ AtomicStore(&lock.WriteTo->Entries[pos], x);
+ } else {
+ TChunk* next = new TChunk();
+ AtomicStore(&next->Entries[0], x);
+ AtomicStore(&lock.Slot->WritePosition, 1u);
+ AtomicStore(&lock.WriteTo->Next, next);
+ lock.WriteTo = next;
+ }
+ }
+
+public:
+ TUnorderedCache() {
+ for (ui32 i = 0; i < Concurrency; ++i) {
+ ReadSlots[i].ReadFrom = new TChunk();
+ ReadSlots[i].ReadPosition = 0;
+
+ WriteSlots[i].WriteTo = ReadSlots[i].ReadFrom;
+ WriteSlots[i].WritePosition = 0;
+ }
+ }
+
+ ~TUnorderedCache() {
+ Y_VERIFY(!Pop(0));
+
+ for (ui64 i = 0; i < Concurrency; ++i) {
+ if (ReadSlots[i].ReadFrom) {
+ delete ReadSlots[i].ReadFrom;
+ ReadSlots[i].ReadFrom = nullptr;
+ }
+ WriteSlots[i].WriteTo = nullptr;
+ }
+ }
+
+ T Pop(ui64 readerRotation) noexcept {
+ ui64 readerIndex = readerRotation;
+ const ui64 endIndex = readerIndex + Concurrency;
+ for (; readerIndex != endIndex; ++readerIndex) {
+ TReadSlot* slot = &ReadSlots[readerIndex % Concurrency];
+ if (AtomicLoad(&slot->ReadFrom) != nullptr) {
+ if (TChunk* readFrom = AtomicSwap(&slot->ReadFrom, nullptr)) {
+ const ui32 pos = AtomicLoad(&slot->ReadPosition);
+ if (pos != TChunk::EntriesCount) {
+ if (T ret = AtomicLoad(&readFrom->Entries[pos])) {
+ AtomicStore(&slot->ReadPosition, pos + 1);
+ AtomicStore(&slot->ReadFrom, readFrom); // release lock with same chunk
+ return ret; // found, return
+ } else {
+ AtomicStore(&slot->ReadFrom, readFrom); // release lock with same chunk
+ }
+ } else if (TChunk* next = AtomicLoad(&readFrom->Next)) {
+ if (T ret = AtomicLoad(&next->Entries[0])) {
+ AtomicStore(&slot->ReadPosition, 1u);
+ AtomicStore(&slot->ReadFrom, next); // release lock with next chunk
+ delete readFrom;
+ return ret;
+ } else {
+ AtomicStore(&slot->ReadPosition, 0u);
+ AtomicStore(&slot->ReadFrom, next); // release lock with new chunk
+ delete readFrom;
+ }
+ } else {
+ // nothing in old chunk and no next chunk, just release lock with old chunk
+ AtomicStore(&slot->ReadFrom, readFrom);
+ }
+ }
+ }
+ }
+
+ return 0; // got nothing after full cycle, return
+ }
+
+ void Push(T x, ui64 writerRotation) {
+ TLockedWriter lock = LockWriter(writerRotation);
+ WriteOne(lock, x);
+ }
+
+ void PushBulk(T* x, ui32 xcount, ui64 writerRotation) {
+ for (;;) {
+ // Fill no more then one queue chunk per round
+ const ui32 xround = Min(xcount, (ui32)TChunk::EntriesCount);
+
+ {
+ TLockedWriter lock = LockWriter(writerRotation++);
+ for (T* end = x + xround; x != end; ++x)
+ WriteOne(lock, *x);
+ }
+
+ if (xcount <= TChunk::EntriesCount)
+ break;
+
+ xcount -= TChunk::EntriesCount;
+ }
+ }
+};
diff --git a/library/cpp/actors/util/unordered_cache_ut.cpp b/library/cpp/actors/util/unordered_cache_ut.cpp
new file mode 100644
index 0000000000..37865f2f91
--- /dev/null
+++ b/library/cpp/actors/util/unordered_cache_ut.cpp
@@ -0,0 +1,138 @@
+#include "unordered_cache.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <util/random/random.h>
+#include <util/system/hp_timer.h>
+#include <util/system/sanitizers.h>
+#include <util/system/thread.h>
+
+Y_UNIT_TEST_SUITE(UnorderedCache) {
+
+ void DoOnePushOnePop(ui64 count) {
+ TUnorderedCache<ui64> queue;
+
+ ui64 readRotation = 0;
+ ui64 writeRotation = 0;
+
+ auto popped = queue.Pop(readRotation++);
+ UNIT_ASSERT_VALUES_EQUAL(popped, 0u);
+
+ for (ui64 i = 0; i < count; ++i) {
+ queue.Push(i + 1, writeRotation++);
+ popped = queue.Pop(readRotation++);
+ UNIT_ASSERT_VALUES_EQUAL(popped, i + 1);
+
+ popped = queue.Pop(readRotation++);
+ UNIT_ASSERT_VALUES_EQUAL(popped, 0u);
+ }
+ }
+
+ Y_UNIT_TEST(OnePushOnePop) {
+ DoOnePushOnePop(1);
+ }
+
+ Y_UNIT_TEST(OnePushOnePop_Repeat1M) {
+ DoOnePushOnePop(1000000);
+ }
+
+ /**
+ * Simplified thread spawning for testing
+ */
+ class TWorkerThread : public ISimpleThread {
+ private:
+ std::function<void()> Func;
+ double Time = 0.0;
+
+ public:
+ TWorkerThread(std::function<void()> func)
+ : Func(std::move(func))
+ { }
+
+ double GetTime() const {
+ return Time;
+ }
+
+ static THolder<TWorkerThread> Spawn(std::function<void()> func) {
+ THolder<TWorkerThread> thread = MakeHolder<TWorkerThread>(std::move(func));
+ thread->Start();
+ return thread;
+ }
+
+ private:
+ void* ThreadProc() noexcept override {
+ THPTimer timer;
+ Func();
+ Time = timer.Passed();
+ return nullptr;
+ }
+ };
+
+ void DoConcurrentPushPop(size_t threads, ui64 perThreadCount) {
+ // Concurrency factor 4 is up to 16 threads
+ TUnorderedCache<ui64, 512, 4> queue;
+
+ auto workerFunc = [&](size_t threadIndex) {
+ ui64 readRotation = 0;
+ ui64 writeRotation = 0;
+ ui64 readsDone = 0;
+ ui64 writesDone = 0;
+ for (;;) {
+ bool canRead = readsDone < writesDone;
+ bool canWrite = writesDone < perThreadCount;
+ if (!canRead && !canWrite) {
+ break;
+ }
+ if (canRead && canWrite) {
+ // Randomly choose between read and write
+ if (RandomNumber<ui64>(2)) {
+ canRead = false;
+ } else {
+ canWrite = false;
+ }
+ }
+ if (canRead) {
+ ui64 popped = queue.Pop(readRotation++);
+ if (popped) {
+ ++readsDone;
+ }
+ }
+ if (canWrite) {
+ queue.Push(1 + writesDone * threads + threadIndex, writeRotation++);
+ ++writesDone;
+ }
+ }
+ };
+
+ TVector<THolder<TWorkerThread>> workers(threads);
+ for (size_t i = 0; i < threads; ++i) {
+ workers[i] = TWorkerThread::Spawn([workerFunc, i]() {
+ workerFunc(i);
+ });
+ }
+
+ double maxTime = 0;
+ for (size_t i = 0; i < threads; ++i) {
+ workers[i]->Join();
+ maxTime = Max(maxTime, workers[i]->GetTime());
+ }
+
+ auto popped = queue.Pop(0);
+ UNIT_ASSERT_VALUES_EQUAL(popped, 0u);
+
+ Cerr << "Concurrent with " << threads << " threads: " << maxTime << " seconds" << Endl;
+ }
+
+ void DoConcurrentPushPop_3times(size_t threads, ui64 perThreadCount) {
+ for (size_t i = 0; i < 3; ++i) {
+ DoConcurrentPushPop(threads, perThreadCount);
+ }
+ }
+
+ static constexpr ui64 PER_THREAD_COUNT = NSan::PlainOrUnderSanitizer(1000000, 100000);
+
+ Y_UNIT_TEST(ConcurrentPushPop_1thread) { DoConcurrentPushPop_3times(1, PER_THREAD_COUNT); }
+ Y_UNIT_TEST(ConcurrentPushPop_2threads) { DoConcurrentPushPop_3times(2, PER_THREAD_COUNT); }
+ Y_UNIT_TEST(ConcurrentPushPop_4threads) { DoConcurrentPushPop_3times(4, PER_THREAD_COUNT); }
+ Y_UNIT_TEST(ConcurrentPushPop_8threads) { DoConcurrentPushPop_3times(8, PER_THREAD_COUNT); }
+ Y_UNIT_TEST(ConcurrentPushPop_16threads) { DoConcurrentPushPop_3times(16, PER_THREAD_COUNT); }
+}
diff --git a/library/cpp/actors/util/ut/ya.make b/library/cpp/actors/util/ut/ya.make
new file mode 100644
index 0000000000..3b08b77984
--- /dev/null
+++ b/library/cpp/actors/util/ut/ya.make
@@ -0,0 +1,18 @@
+UNITTEST_FOR(library/cpp/actors/util)
+
+IF (WITH_VALGRIND)
+ TIMEOUT(600)
+ SIZE(MEDIUM)
+ENDIF()
+
+OWNER(
+ alexvru
+ g:kikimr
+)
+
+SRCS(
+ rope_ut.cpp
+ unordered_cache_ut.cpp
+)
+
+END()
diff --git a/library/cpp/actors/util/ya.make b/library/cpp/actors/util/ya.make
new file mode 100644
index 0000000000..37488c3962
--- /dev/null
+++ b/library/cpp/actors/util/ya.make
@@ -0,0 +1,37 @@
+LIBRARY()
+
+OWNER(
+ ddoarn
+ g:kikimr
+)
+
+SRCS(
+ affinity.cpp
+ affinity.h
+ cpumask.h
+ datetime.h
+ defs.h
+ funnel_queue.h
+ futex.h
+ intrinsics.h
+ local_process_key.h
+ named_tuple.h
+ queue_chunk.h
+ queue_oneone_inplace.h
+ recentwnd.h
+ rope.h
+ should_continue.cpp
+ should_continue.h
+ thread.h
+ threadparkpad.cpp
+ threadparkpad.h
+ ticket_lock.h
+ timerfd.h
+ unordered_cache.h
+)
+
+PEERDIR(
+ util
+)
+
+END()
diff --git a/library/cpp/actors/wilson/wilson_event.h b/library/cpp/actors/wilson/wilson_event.h
new file mode 100644
index 0000000000..7d89c33b51
--- /dev/null
+++ b/library/cpp/actors/wilson/wilson_event.h
@@ -0,0 +1,181 @@
+#pragma once
+
+#include "wilson_trace.h"
+
+#include <library/cpp/string_utils/base64/base64.h>
+
+#include <library/cpp/actors/core/log.h>
+
+namespace NWilson {
+#if !defined(_win_)
+// works only for those compilers, who trait C++ as ISO IEC 14882, not their own standard
+
+#define __UNROLL_PARAMS_8(N, F, X, ...) \
+ F(X, N - 8) \
+ __UNROLL_PARAMS_7(N, F, ##__VA_ARGS__)
+#define __UNROLL_PARAMS_7(N, F, X, ...) \
+ F(X, N - 7) \
+ __UNROLL_PARAMS_6(N, F, ##__VA_ARGS__)
+#define __UNROLL_PARAMS_6(N, F, X, ...) \
+ F(X, N - 6) \
+ __UNROLL_PARAMS_5(N, F, ##__VA_ARGS__)
+#define __UNROLL_PARAMS_5(N, F, X, ...) \
+ F(X, N - 5) \
+ __UNROLL_PARAMS_4(N, F, ##__VA_ARGS__)
+#define __UNROLL_PARAMS_4(N, F, X, ...) \
+ F(X, N - 4) \
+ __UNROLL_PARAMS_3(N, F, ##__VA_ARGS__)
+#define __UNROLL_PARAMS_3(N, F, X, ...) \
+ F(X, N - 3) \
+ __UNROLL_PARAMS_2(N, F, ##__VA_ARGS__)
+#define __UNROLL_PARAMS_2(N, F, X, ...) \
+ F(X, N - 2) \
+ __UNROLL_PARAMS_1(N, F, ##__VA_ARGS__)
+#define __UNROLL_PARAMS_1(N, F, X) F(X, N - 1)
+#define __UNROLL_PARAMS_0(N, F)
+#define __EX(...) __VA_ARGS__
+#define __NUM_PARAMS(...) __NUM_PARAMS_SELECT_N(__VA_ARGS__, __NUM_PARAMS_SEQ)
+#define __NUM_PARAMS_SELECT_N(...) __EX(__NUM_PARAMS_SELECT(__VA_ARGS__))
+#define __NUM_PARAMS_SELECT(X, _1, _2, _3, _4, _5, _6, _7, _8, N, ...) N
+#define __NUM_PARAMS_SEQ 8, 7, 6, 5, 4, 3, 2, 1, 0, ERROR
+#define __CAT(X, Y) X##Y
+#define __UNROLL_PARAMS_N(N, F, ...) __EX(__CAT(__UNROLL_PARAMS_, N)(N, F, ##__VA_ARGS__))
+#define __UNROLL_PARAMS(F, ...) __UNROLL_PARAMS_N(__NUM_PARAMS(X, ##__VA_ARGS__), F, ##__VA_ARGS__)
+#define __EX2(F, X, INDEX) __INVOKE(F, __EX X, INDEX)
+#define __INVOKE(F, ...) F(__VA_ARGS__)
+
+#define __DECLARE_PARAM(X, INDEX) __EX2(__DECLARE_PARAM_X, X, INDEX)
+#define __DECLARE_PARAM_X(TYPE, NAME, INDEX) \
+ static const struct T##NAME##Param \
+ : ::NWilson::TParamBinder<INDEX, TYPE> { \
+ T##NAME##Param() { \
+ } \
+ using ::NWilson::TParamBinder<INDEX, TYPE>::operator=; \
+ } NAME;
+
+#define __TUPLE_PARAM(X, INDEX) __EX2(__TUPLE_PARAM_X, X, INDEX)
+#define __TUPLE_PARAM_X(TYPE, NAME, INDEX) TYPE,
+
+#define __OUTPUT_PARAM(X, INDEX) __EX2(__OUTPUT_PARAM_X, X, INDEX)
+#define __OUTPUT_PARAM_X(TYPE, NAME, INDEX) str << (INDEX ? ", " : "") << #NAME << "# " << std::get<INDEX>(ParamPack);
+
+#define __FILL_PARAM(P, INDEX) \
+ do { \
+ const auto& boundParam = (NParams::P); \
+ boundParam.Apply(event.ParamPack); \
+ } while (false);
+
+#define DECLARE_WILSON_EVENT(EVENT_NAME, ...) \
+ namespace N##EVENT_NAME##Params { \
+ __UNROLL_PARAMS(__DECLARE_PARAM, ##__VA_ARGS__) \
+ \
+ using TParamPack = std::tuple< \
+ __UNROLL_PARAMS(__TUPLE_PARAM, ##__VA_ARGS__) char>; \
+ } \
+ struct T##EVENT_NAME { \
+ using TParamPack = N##EVENT_NAME##Params::TParamPack; \
+ TParamPack ParamPack; \
+ \
+ void Output(IOutputStream& str) { \
+ str << #EVENT_NAME << "{"; \
+ __UNROLL_PARAMS(__OUTPUT_PARAM, ##__VA_ARGS__) \
+ str << "}"; \
+ } \
+ };
+
+ template <size_t INDEX, typename T>
+ class TBoundParam {
+ mutable T Value;
+
+ public:
+ TBoundParam(T&& value)
+ : Value(std::move(value))
+ {
+ }
+
+ template <typename TParamPack>
+ void Apply(TParamPack& pack) const {
+ std::get<INDEX>(pack) = std::move(Value);
+ }
+ };
+
+ template <size_t INDEX, typename T>
+ struct TParamBinder {
+ template <typename TValue>
+ TBoundParam<INDEX, T> operator=(const TValue& value) const {
+ return TBoundParam<INDEX, T>(TValue(value));
+ }
+
+ template <typename TValue>
+ TBoundParam<INDEX, T> operator=(TValue&& value) const {
+ return TBoundParam<INDEX, T>(std::move(value));
+ }
+ };
+
+// generate wilson event having parent TRACE_ID and span TRACE_ID to become parent of logged event
+#define WILSON_TRACE(CTX, TRACE_ID, EVENT_NAME, ...) \
+ if (::NWilson::TraceEnabled(CTX)) { \
+ ::NWilson::TTraceId* __traceId = (TRACE_ID); \
+ if (__traceId && *__traceId) { \
+ TInstant now = Now(); \
+ T##EVENT_NAME event; \
+ namespace NParams = N##EVENT_NAME##Params; \
+ __UNROLL_PARAMS(__FILL_PARAM, ##__VA_ARGS__) \
+ ::NWilson::TraceEvent((CTX), __traceId, event, now); \
+ } \
+ }
+
+ inline ui32 GetNodeId(const NActors::TActorSystem& actorSystem) {
+ return actorSystem.NodeId;
+ }
+ inline ui32 GetNodeId(const NActors::TActivationContext& ac) {
+ return GetNodeId(*ac.ExecutorThread.ActorSystem);
+ }
+
+ constexpr ui32 WilsonComponentId = 430; // kikimrservices: wilson
+
+ template <typename TActorSystem>
+ bool TraceEnabled(const TActorSystem& ctx) {
+ const auto* loggerSettings = ctx.LoggerSettings();
+ return loggerSettings && loggerSettings->Satisfies(NActors::NLog::PRI_DEBUG, WilsonComponentId);
+ }
+
+ template <typename TActorSystem, typename TEvent>
+ void TraceEvent(const TActorSystem& actorSystem, TTraceId* traceId, TEvent&& event, TInstant timestamp) {
+ // ensure that we are not using obsolete TraceId
+ traceId->CheckConsistency();
+
+ // store parent id (for logging) and generate child trace id
+ TTraceId parentTraceId(std::move(*traceId));
+ *traceId = parentTraceId.Span();
+
+ // create encoded string buffer containing timestamp
+ const ui64 timestampValue = timestamp.GetValue();
+ const size_t base64size = Base64EncodeBufSize(sizeof(timestampValue));
+ char base64[base64size];
+ char* end = Base64Encode(base64, reinterpret_cast<const ui8*>(&timestampValue), sizeof(timestampValue));
+
+ // cut trailing padding character to save some space
+ Y_VERIFY(end > base64 && end[-1] == '=');
+ --end;
+
+ // generate log record
+ TString finalMessage;
+ TStringOutput s(finalMessage);
+ s << GetNodeId(actorSystem) << " " << TStringBuf(base64, end) << " ";
+ traceId->Output(s, parentTraceId);
+ s << " ";
+ event.Output(s);
+
+ // output wilson event FIXME: special facility for wilson events w/binary serialization
+ NActors::MemLogAdapter(actorSystem, NActors::NLog::PRI_DEBUG, WilsonComponentId, std::move(finalMessage));
+ }
+
+#else
+
+#define DECLARE_WILSON_EVENT(...)
+#define WILSON_TRACE(...)
+
+#endif
+
+} // NWilson
diff --git a/library/cpp/actors/wilson/wilson_trace.h b/library/cpp/actors/wilson/wilson_trace.h
new file mode 100644
index 0000000000..3d1ca50562
--- /dev/null
+++ b/library/cpp/actors/wilson/wilson_trace.h
@@ -0,0 +1,161 @@
+#pragma once
+
+#include <library/cpp/string_utils/base64/base64.h>
+
+#include <util/stream/output.h>
+#include <util/random/random.h>
+
+#include <util/string/printf.h>
+
+namespace NWilson {
+ class TTraceId {
+ ui64 TraceId; // Random id of topmost client request
+ ui64 SpanId; // Span id of part of request currently being executed
+
+ private:
+ TTraceId(ui64 traceId, ui64 spanId)
+ : TraceId(traceId)
+ , SpanId(spanId)
+ {
+ }
+
+ static ui64 GenerateTraceId() {
+ ui64 traceId = 0;
+ while (!traceId) {
+ traceId = RandomNumber<ui64>();
+ }
+ return traceId;
+ }
+
+ static ui64 GenerateSpanId() {
+ return RandomNumber<ui64>();
+ }
+
+ public:
+ using TSerializedTraceId = char[2 * sizeof(ui64)];
+
+ public:
+ TTraceId()
+ : TraceId(0)
+ , SpanId(0)
+ {
+ }
+
+ explicit TTraceId(ui64 traceId)
+ : TraceId(traceId)
+ , SpanId(0)
+ {
+ }
+
+ TTraceId(const TSerializedTraceId& in)
+ : TraceId(reinterpret_cast<const ui64*>(in)[0])
+ , SpanId(reinterpret_cast<const ui64*>(in)[1])
+ {
+ }
+
+ // allow move semantic
+ TTraceId(TTraceId&& other)
+ : TraceId(other.TraceId)
+ , SpanId(other.SpanId)
+ {
+ other.TraceId = 0;
+ other.SpanId = 1; // explicitly mark invalid
+ }
+
+ TTraceId& operator=(TTraceId&& other) {
+ TraceId = other.TraceId;
+ SpanId = other.SpanId;
+ other.TraceId = 0;
+ other.SpanId = 1; // explicitly mark invalid
+ return *this;
+ }
+
+ // do not allow implicit copy of trace id
+ TTraceId(const TTraceId& other) = delete;
+ TTraceId& operator=(const TTraceId& other) = delete;
+
+ static TTraceId NewTraceId() {
+ return TTraceId(GenerateTraceId(), 0);
+ }
+
+ // create separate branch from this point
+ TTraceId SeparateBranch() const {
+ return Clone();
+ }
+
+ TTraceId Clone() const {
+ return TTraceId(TraceId, SpanId);
+ }
+
+ TTraceId Span() const {
+ return *this ? TTraceId(TraceId, GenerateSpanId()) : TTraceId();
+ }
+
+ ui64 GetTraceId() const {
+ return TraceId;
+ }
+
+ // Check if request tracing is enabled
+ operator bool() const {
+ return TraceId != 0;
+ }
+
+ // Output trace id into a string stream
+ void Output(IOutputStream& s, const TTraceId& parentTraceId) const {
+ union {
+ ui8 buffer[3 * sizeof(ui64)];
+ struct {
+ ui64 traceId;
+ ui64 spanId;
+ ui64 parentSpanId;
+ } x;
+ };
+
+ x.traceId = TraceId;
+ x.spanId = SpanId;
+ x.parentSpanId = parentTraceId.SpanId;
+
+ const size_t base64size = Base64EncodeBufSize(sizeof(x));
+ char base64[base64size];
+ char* end = Base64Encode(base64, buffer, sizeof(x));
+ s << TStringBuf(base64, end);
+ }
+
+ // output just span id into stream
+ void OutputSpanId(IOutputStream& s) const {
+ const size_t base64size = Base64EncodeBufSize(sizeof(SpanId));
+ char base64[base64size];
+ char* end = Base64Encode(base64, reinterpret_cast<const ui8*>(&SpanId), sizeof(SpanId));
+
+ // cut trailing padding character
+ Y_VERIFY(end > base64 && end[-1] == '=');
+ --end;
+
+ s << TStringBuf(base64, end);
+ }
+
+ void CheckConsistency() {
+ // if TraceId is zero, then SpanId must be zero too
+ Y_VERIFY_DEBUG(*this || !SpanId);
+ }
+
+ friend bool operator==(const TTraceId& x, const TTraceId& y) {
+ return x.TraceId == y.TraceId && x.SpanId == y.SpanId;
+ }
+
+ TString ToString() const {
+ return Sprintf("%" PRIu64 ":%" PRIu64, TraceId, SpanId);
+ }
+
+ bool IsFromSameTree(const TTraceId& other) const {
+ return TraceId == other.TraceId;
+ }
+
+ void Serialize(TSerializedTraceId* out) {
+ ui64* p = reinterpret_cast<ui64*>(*out);
+ p[0] = TraceId;
+ p[1] = SpanId;
+ }
+ };
+
+}
diff --git a/library/cpp/actors/wilson/ya.make b/library/cpp/actors/wilson/ya.make
new file mode 100644
index 0000000000..e371f5061d
--- /dev/null
+++ b/library/cpp/actors/wilson/ya.make
@@ -0,0 +1,14 @@
+LIBRARY()
+
+PEERDIR(
+ library/cpp/string_utils/base64
+)
+
+OWNER(alexvru)
+
+SRCS(
+ wilson_event.h
+ wilson_trace.h
+)
+
+END()
diff --git a/library/cpp/actors/ya.make b/library/cpp/actors/ya.make
new file mode 100644
index 0000000000..737c7fbc18
--- /dev/null
+++ b/library/cpp/actors/ya.make
@@ -0,0 +1,16 @@
+RECURSE_FOR_TESTS(ut)
+
+RECURSE(
+ log_backend
+ core
+ dnsresolver
+ examples
+ memory_log
+ helpers
+ prof
+ protos
+ util
+ wilson
+ testlib
+ http
+)