aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/threading/bounded_queue
diff options
context:
space:
mode:
authorkulikov <kulikov@yandex-team.com>2023-07-21 13:59:33 +0300
committerkulikov <kulikov@yandex-team.com>2023-07-21 13:59:33 +0300
commit5706cb392271ea40eab053314e7c0f4d9d4547ba (patch)
tree72bce210dc3747df1d9319fc9f56b848852d2aab /library/cpp/threading/bounded_queue
parent122a6055cef2bc785407c69b33b858a07b319e66 (diff)
downloadydb-5706cb392271ea40eab053314e7c0f4d9d4547ba.tar.gz
try to get rid of locks and allocations for elastic queue thread pool
In case of heavy load and high rps current thread pool implementation seems to have problems at least with contention on lock inside condvar (long futex wait calls from http server listener thread), so try to implement something more efficient: - replace condvar with TEventCounter implementation without internal lock (pthread condvar maintains waiters wakeup order, thread pool doesn't need it); - introduce well-known bounded mpmc queue over ring buffer; - get rid of TDecrementingWrapper; - add options to turn on new pool in library/cpp/http/server and search/daemons (will remove after adoption); - make elastic queue ut check both versions; - workaround problems with android/arm build targets.
Diffstat (limited to 'library/cpp/threading/bounded_queue')
-rw-r--r--library/cpp/threading/bounded_queue/CMakeLists.darwin-x86_64.txt14
-rw-r--r--library/cpp/threading/bounded_queue/CMakeLists.linux-aarch64.txt15
-rw-r--r--library/cpp/threading/bounded_queue/CMakeLists.linux-x86_64.txt15
-rw-r--r--library/cpp/threading/bounded_queue/CMakeLists.txt17
-rw-r--r--library/cpp/threading/bounded_queue/CMakeLists.windows-x86_64.txt14
-rw-r--r--library/cpp/threading/bounded_queue/bounded_queue.h89
-rw-r--r--library/cpp/threading/bounded_queue/bounded_queue_ut.cpp106
-rw-r--r--library/cpp/threading/bounded_queue/ut/ya.make8
-rw-r--r--library/cpp/threading/bounded_queue/ya.make9
9 files changed, 287 insertions, 0 deletions
diff --git a/library/cpp/threading/bounded_queue/CMakeLists.darwin-x86_64.txt b/library/cpp/threading/bounded_queue/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..bd9b68ab62
--- /dev/null
+++ b/library/cpp/threading/bounded_queue/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,14 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-threading-bounded_queue INTERFACE)
+target_link_libraries(cpp-threading-bounded_queue INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+)
diff --git a/library/cpp/threading/bounded_queue/CMakeLists.linux-aarch64.txt b/library/cpp/threading/bounded_queue/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..613a18db10
--- /dev/null
+++ b/library/cpp/threading/bounded_queue/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,15 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-threading-bounded_queue INTERFACE)
+target_link_libraries(cpp-threading-bounded_queue INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
diff --git a/library/cpp/threading/bounded_queue/CMakeLists.linux-x86_64.txt b/library/cpp/threading/bounded_queue/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..613a18db10
--- /dev/null
+++ b/library/cpp/threading/bounded_queue/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,15 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-threading-bounded_queue INTERFACE)
+target_link_libraries(cpp-threading-bounded_queue INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
diff --git a/library/cpp/threading/bounded_queue/CMakeLists.txt b/library/cpp/threading/bounded_queue/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/library/cpp/threading/bounded_queue/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/library/cpp/threading/bounded_queue/CMakeLists.windows-x86_64.txt b/library/cpp/threading/bounded_queue/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..bd9b68ab62
--- /dev/null
+++ b/library/cpp/threading/bounded_queue/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,14 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-threading-bounded_queue INTERFACE)
+target_link_libraries(cpp-threading-bounded_queue INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+)
diff --git a/library/cpp/threading/bounded_queue/bounded_queue.h b/library/cpp/threading/bounded_queue/bounded_queue.h
new file mode 100644
index 0000000000..c5c6714086
--- /dev/null
+++ b/library/cpp/threading/bounded_queue/bounded_queue.h
@@ -0,0 +1,89 @@
+#pragma once
+
+#include <util/generic/yexception.h>
+
+//https://www.1024cores.net/home/lock-free-algorithms/queues/bounded-mpmc-queue
+
+namespace NThreading {
+ template<typename T>
+ class TBoundedQueue {
+ public:
+ explicit TBoundedQueue(size_t size)
+ : Buffer_(new TCell[size])
+ , Mask_(size - 1)
+ {
+ Y_ENSURE(size >= 2 && (size & (size - 1)) == 0);
+
+ for (size_t i = 0; i < size; ++i) {
+ Buffer_[i].Sequence.store(i, std::memory_order_relaxed);
+ }
+ }
+
+ template <typename T_>
+ [[nodiscard]] bool Enqueue(T_&& data) noexcept {
+ TCell* cell;
+ size_t pos = EnqueuePos_.load(std::memory_order_relaxed);
+
+ for (;;) {
+ cell = &Buffer_[pos & Mask_];
+ size_t seq = cell->Sequence.load(std::memory_order_acquire);
+ intptr_t dif = (intptr_t)seq - (intptr_t)pos;
+
+ if (dif == 0) {
+ if (EnqueuePos_.compare_exchange_weak(pos, pos + 1, std::memory_order_relaxed)) {
+ break;
+ }
+ } else if (dif < 0) {
+ return false;
+ } else {
+ pos = EnqueuePos_.load(std::memory_order_relaxed);
+ }
+ }
+
+ static_assert(noexcept(cell->Data = std::forward<T_>(data)));
+ cell->Data = std::forward<T_>(data);
+ cell->Sequence.store(pos + 1, std::memory_order_release);
+
+ return true;
+ }
+
+ [[nodiscard]] bool Dequeue(T& data) noexcept {
+ TCell* cell;
+ size_t pos = DequeuePos_.load(std::memory_order_relaxed);
+
+ for (;;) {
+ cell = &Buffer_[pos & Mask_];
+ size_t seq = cell->Sequence.load(std::memory_order_acquire);
+ intptr_t dif = (intptr_t)seq - (intptr_t)(pos + 1);
+
+ if (dif == 0) {
+ if (DequeuePos_.compare_exchange_weak(pos, pos + 1, std::memory_order_relaxed)) {
+ break;
+ }
+ } else if (dif < 0) {
+ return false;
+ } else {
+ pos = DequeuePos_.load(std::memory_order_relaxed);
+ }
+ }
+
+ static_assert(noexcept(data = std::move(cell->Data)));
+ data = std::move(cell->Data);
+
+ cell->Sequence.store(pos + Mask_ + 1, std::memory_order_release);
+ return true;
+ }
+ private:
+ struct TCell {
+ std::atomic<size_t> Sequence;
+ T Data;
+ };
+
+ std::unique_ptr<TCell[]> Buffer_;
+ const size_t Mask_;
+
+ alignas(64) std::atomic<size_t> EnqueuePos_ = 0;
+ alignas(64) std::atomic<size_t> DequeuePos_ = 0;
+ };
+}
+
diff --git a/library/cpp/threading/bounded_queue/bounded_queue_ut.cpp b/library/cpp/threading/bounded_queue/bounded_queue_ut.cpp
new file mode 100644
index 0000000000..bb5b6eb787
--- /dev/null
+++ b/library/cpp/threading/bounded_queue/bounded_queue_ut.cpp
@@ -0,0 +1,106 @@
+#include "bounded_queue.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <util/thread/factory.h>
+
+using namespace NThreading;
+
+Y_UNIT_TEST_SUITE(TBoundedQueueTest) {
+ Y_UNIT_TEST(QueueSize) {
+ const size_t queueSize = 16;
+ TBoundedQueue<size_t> boundedQueue(queueSize);
+
+ for (size_t i = 0; i < queueSize; ++i) {
+ UNIT_ASSERT(boundedQueue.Enqueue(i));
+ }
+ UNIT_ASSERT(!boundedQueue.Enqueue(0));
+ size_t tmp = 0;
+ UNIT_ASSERT(boundedQueue.Dequeue(tmp));
+ UNIT_ASSERT(boundedQueue.Enqueue(0));
+ UNIT_ASSERT(!boundedQueue.Enqueue(0));
+ }
+
+ Y_UNIT_TEST(Move) {
+ const size_t queueSize = 16;
+ TBoundedQueue<TString> boundedQueue(queueSize);
+
+ for (size_t i = 0; i < queueSize; ++i) {
+ TString v = "xxx";
+ UNIT_ASSERT(boundedQueue.Enqueue(std::move(v)));
+ UNIT_ASSERT(v.empty());
+ }
+
+ {
+ TString v = "xxx";
+ UNIT_ASSERT(!boundedQueue.Enqueue(std::move(v)));
+ UNIT_ASSERT(v == "xxx");
+ }
+
+ TString v;
+ UNIT_ASSERT(boundedQueue.Dequeue(v));
+ UNIT_ASSERT(v == "xxx");
+ }
+
+ Y_UNIT_TEST(MPMC) {
+ size_t queueSize = 16;
+ size_t producers = 10;
+ size_t consumers = 10;
+ size_t itemsCount = 10000;
+
+ TVector<THolder<IThreadFactory::IThread>> threads;
+ TBoundedQueue<std::pair<size_t, size_t>> boundedQueue(queueSize);
+
+ std::atomic<size_t> itemCounter = 0;
+ std::atomic<size_t> consumed = 0;
+
+ for (size_t i = 0; i < consumers; ++i) {
+ threads.push_back(SystemThreadFactory()->Run(
+ [&]() {
+ TVector<size_t> prevItems(producers);
+ for (;;) {
+ std::pair<size_t, size_t> item;
+ while (!boundedQueue.Dequeue(item)) {
+ ;
+ }
+
+ if (item.first >= producers) {
+ break;
+ }
+
+ UNIT_ASSERT(item.second > prevItems[item.first]);
+ prevItems[item.first] = item.second;
+ ++consumed;
+ }
+ })
+ );
+ }
+
+ for (size_t i = 0; i < producers ; ++i) {
+ threads.push_back(SystemThreadFactory()->Run(
+ [&, producerNum = i]() {
+ for (;;) {
+ size_t item = ++itemCounter;
+ if (item > itemsCount) {
+ break;
+ }
+
+ while (!boundedQueue.Enqueue(std::make_pair(producerNum, item))) {
+ ;
+ }
+ }
+
+ while (!boundedQueue.Enqueue(std::make_pair(producers, size_t(0)))) {
+ ;
+ }
+ })
+ );
+ }
+
+
+ for (auto& t : threads) {
+ t->Join();
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(consumed.load(), itemsCount);
+ }
+}
diff --git a/library/cpp/threading/bounded_queue/ut/ya.make b/library/cpp/threading/bounded_queue/ut/ya.make
new file mode 100644
index 0000000000..5db0a22713
--- /dev/null
+++ b/library/cpp/threading/bounded_queue/ut/ya.make
@@ -0,0 +1,8 @@
+UNITTEST_FOR(library/cpp/threading/bounded_queue)
+
+SRCS(
+ bounded_queue_ut.cpp
+)
+
+END()
+
diff --git a/library/cpp/threading/bounded_queue/ya.make b/library/cpp/threading/bounded_queue/ya.make
new file mode 100644
index 0000000000..4c5336f031
--- /dev/null
+++ b/library/cpp/threading/bounded_queue/ya.make
@@ -0,0 +1,9 @@
+LIBRARY()
+
+SRCS(
+ bounded_queue.h
+)
+
+END()
+
+RECURSE_FOR_TESTS(ut)