summaryrefslogtreecommitdiffstats
path: root/library/cpp/yt
diff options
context:
space:
mode:
authorbabenko <[email protected]>2026-06-06 23:52:00 +0300
committerbabenko <[email protected]>2026-06-07 00:16:49 +0300
commitf10c7206fb31af8057446bceef9707aabaa9456e (patch)
treee3283bfe824beda100b4a598006fdbc2fe221725 /library/cpp/yt
parent51122ce7f1b07aa13c32b9977a8aad5759854b15 (diff)
Cache process/thread id getters and use them in TError origin capture
## Motivation Profiling the YT master Automaton thread showed TOriginAttributes::Capture (run on every non-OK TError) spending ~60% of its time in a getpid() syscall — uncached on glibc >= 2.25. NYT::GetCurrentThreadId() (gettid) feeds hot thread-affinity / log-manager checks on the same thread. ## Changes - New library/cpp/yt/system/process_id.* with cached GetProcessId(); GetSystemThreadId() now caches the kernel tid in TLS. Both caches reset in the child after fork. - Moved thread_name.{h,cpp} from misc to system. - Removed GetCurrentProcessId/GetCurrentThreadId shims from yt/yt/core/misc/proc.{h,cpp}; migrated all callers to NYT::GetProcessId / NYT::GetSystemThreadId. - TOriginAttributes::Capture uses the cached getters; recorded Tid is now the real kernel tid (matches perf/ps). - Added microbenchmarks (library/cpp/yt/system/benchmarks, yt/yt/core/benchmarks/error.cpp). ## Microbenchmarks (release) | | before | after | |---|---|---| | getpid | 101 ns | 0.33 ns | | gettid | 102 ns | 1.64 ns | | Capture | 161 ns | 50 ns | | failed TError | 221 ns | 74 ns | commit_hash:ee37ae57d61a5a2dd33daee935270f4bb93b7ff9
Diffstat (limited to 'library/cpp/yt')
-rw-r--r--library/cpp/yt/error/origin_attributes.cpp9
-rw-r--r--library/cpp/yt/error/origin_attributes.h3
-rw-r--r--library/cpp/yt/error/ya.make1
-rw-r--r--library/cpp/yt/logging/logger.cpp2
-rw-r--r--library/cpp/yt/logging/logger.h2
-rw-r--r--library/cpp/yt/logging/ya.make1
-rw-r--r--library/cpp/yt/misc/ya.make1
-rw-r--r--library/cpp/yt/system/benchmarks/process.cpp35
-rw-r--r--library/cpp/yt/system/benchmarks/thread.cpp55
-rw-r--r--library/cpp/yt/system/benchmarks/ya.make12
-rw-r--r--library/cpp/yt/system/process_id-inl.h35
-rw-r--r--library/cpp/yt/system/process_id.cpp37
-rw-r--r--library/cpp/yt/system/process_id.h22
-rw-r--r--library/cpp/yt/system/thread_id-inl.h19
-rw-r--r--library/cpp/yt/system/thread_id.cpp28
-rw-r--r--library/cpp/yt/system/thread_id.h3
-rw-r--r--library/cpp/yt/system/thread_name.cpp (renamed from library/cpp/yt/misc/thread_name.cpp)0
-rw-r--r--library/cpp/yt/system/thread_name.h (renamed from library/cpp/yt/misc/thread_name.h)0
-rw-r--r--library/cpp/yt/system/ya.make6
19 files changed, 261 insertions, 10 deletions
diff --git a/library/cpp/yt/error/origin_attributes.cpp b/library/cpp/yt/error/origin_attributes.cpp
index 18582bf3e83..2198912d419 100644
--- a/library/cpp/yt/error/origin_attributes.cpp
+++ b/library/cpp/yt/error/origin_attributes.cpp
@@ -3,12 +3,13 @@
#include <library/cpp/yt/assert/assert.h>
-#include <library/cpp/yt/misc/thread_name.h>
#include <library/cpp/yt/misc/tls.h>
#include <library/cpp/yt/string/format.h>
-#include <util/system/thread.h>
+#include <library/cpp/yt/system/process_id.h>
+#include <library/cpp/yt/system/thread_id.h>
+#include <library/cpp/yt/system/thread_name.h>
namespace NYT {
@@ -65,8 +66,8 @@ void TOriginAttributes::Capture()
}
Datetime = TInstant::Now();
- Pid = GetPID();
- Tid = TThread::CurrentThreadId();
+ Pid = GetProcessId();
+ Tid = GetSystemThreadId();
ThreadName = GetCurrentThreadName();
ExtensionData = NDetail::GetExtensionData();
}
diff --git a/library/cpp/yt/error/origin_attributes.h b/library/cpp/yt/error/origin_attributes.h
index 8804cdf2569..335e2110dcc 100644
--- a/library/cpp/yt/error/origin_attributes.h
+++ b/library/cpp/yt/error/origin_attributes.h
@@ -7,7 +7,8 @@
#include <library/cpp/yt/memory/ref.h>
#include <library/cpp/yt/misc/guid.h>
-#include <library/cpp/yt/misc/thread_name.h>
+
+#include <library/cpp/yt/system/thread_name.h>
#include <library/cpp/yt/threading/public.h>
diff --git a/library/cpp/yt/error/ya.make b/library/cpp/yt/error/ya.make
index 3fe4f88bed7..716af327361 100644
--- a/library/cpp/yt/error/ya.make
+++ b/library/cpp/yt/error/ya.make
@@ -8,6 +8,7 @@ PEERDIR(
library/cpp/yt/global
library/cpp/yt/memory
library/cpp/yt/misc
+ library/cpp/yt/system
library/cpp/yt/threading
library/cpp/yt/string
library/cpp/yt/logging # TODO(arkady-e1ppa): Consider logging error_code crashes to stderr and drop this dep.
diff --git a/library/cpp/yt/logging/logger.cpp b/library/cpp/yt/logging/logger.cpp
index e0af30382d6..458d01d8133 100644
--- a/library/cpp/yt/logging/logger.cpp
+++ b/library/cpp/yt/logging/logger.cpp
@@ -4,7 +4,7 @@
#include <library/cpp/yt/cpu_clock/clock.h>
-#include <library/cpp/yt/misc/thread_name.h>
+#include <library/cpp/yt/system/thread_name.h>
#include <util/system/compiler.h>
#include <util/system/thread.h>
diff --git a/library/cpp/yt/logging/logger.h b/library/cpp/yt/logging/logger.h
index e3e6f58d5a3..4cb6021e53f 100644
--- a/library/cpp/yt/logging/logger.h
+++ b/library/cpp/yt/logging/logger.h
@@ -12,7 +12,7 @@
#include <library/cpp/yt/misc/guid.h>
-#include <library/cpp/yt/misc/thread_name.h>
+#include <library/cpp/yt/system/thread_name.h>
#include <library/cpp/yt/memory/leaky_singleton.h>
diff --git a/library/cpp/yt/logging/ya.make b/library/cpp/yt/logging/ya.make
index e611c2e554c..8b338ed3b77 100644
--- a/library/cpp/yt/logging/ya.make
+++ b/library/cpp/yt/logging/ya.make
@@ -10,6 +10,7 @@ PEERDIR(
library/cpp/yt/assert
library/cpp/yt/memory
library/cpp/yt/misc
+ library/cpp/yt/system
library/cpp/yt/yson_string
)
diff --git a/library/cpp/yt/misc/ya.make b/library/cpp/yt/misc/ya.make
index 3135887e95e..5d2d6d67f33 100644
--- a/library/cpp/yt/misc/ya.make
+++ b/library/cpp/yt/misc/ya.make
@@ -5,7 +5,6 @@ INCLUDE(${ARCADIA_ROOT}/library/cpp/yt/ya_cpp.make.inc)
SRCS(
guid.cpp
source_location.cpp
- thread_name.cpp
)
PEERDIR(
diff --git a/library/cpp/yt/system/benchmarks/process.cpp b/library/cpp/yt/system/benchmarks/process.cpp
new file mode 100644
index 00000000000..74b4b6b1e13
--- /dev/null
+++ b/library/cpp/yt/system/benchmarks/process.cpp
@@ -0,0 +1,35 @@
+#include <benchmark/benchmark.h>
+
+#include <library/cpp/yt/system/process_id.h>
+
+#include <util/system/getpid.h>
+
+namespace NYT {
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+void BM_GetProcessId(benchmark::State& state)
+{
+ // Cached getpid: only the first call hits the kernel.
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(GetProcessId());
+ }
+}
+
+BENCHMARK(BM_GetProcessId);
+
+void BM_RawGetPid(benchmark::State& state)
+{
+ // Uncached getpid syscall (uncached on glibc >= 2.25), for comparison.
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(::GetPID());
+ }
+}
+
+BENCHMARK(BM_RawGetPid);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT
diff --git a/library/cpp/yt/system/benchmarks/thread.cpp b/library/cpp/yt/system/benchmarks/thread.cpp
new file mode 100644
index 00000000000..5aa5f7dc2d9
--- /dev/null
+++ b/library/cpp/yt/system/benchmarks/thread.cpp
@@ -0,0 +1,55 @@
+#include <benchmark/benchmark.h>
+
+#include <library/cpp/yt/system/thread_id.h>
+#include <library/cpp/yt/system/thread_name.h>
+
+#include <util/system/thread.h>
+
+namespace NYT {
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+void BM_GetSystemThreadId(benchmark::State& state)
+{
+ // Cached gettid: only the first call per thread hits the kernel.
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(GetSystemThreadId());
+ }
+}
+
+BENCHMARK(BM_GetSystemThreadId);
+
+void BM_GetSequentialThreadId(benchmark::State& state)
+{
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(GetSequentialThreadId());
+ }
+}
+
+BENCHMARK(BM_GetSequentialThreadId);
+
+void BM_RawGetTid(benchmark::State& state)
+{
+ // Uncached gettid syscall, for comparison.
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(::TThread::CurrentThreadNumericId());
+ }
+}
+
+BENCHMARK(BM_RawGetTid);
+
+void BM_GetCurrentThreadName(benchmark::State& state)
+{
+ // TLS-cached thread name (also read by TOriginAttributes::Capture).
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(GetCurrentThreadName());
+ }
+}
+
+BENCHMARK(BM_GetCurrentThreadName);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT
diff --git a/library/cpp/yt/system/benchmarks/ya.make b/library/cpp/yt/system/benchmarks/ya.make
new file mode 100644
index 00000000000..874d8df323d
--- /dev/null
+++ b/library/cpp/yt/system/benchmarks/ya.make
@@ -0,0 +1,12 @@
+G_BENCHMARK()
+
+SRCS(
+ process.cpp
+ thread.cpp
+)
+
+PEERDIR(
+ library/cpp/yt/system
+)
+
+END()
diff --git a/library/cpp/yt/system/process_id-inl.h b/library/cpp/yt/system/process_id-inl.h
new file mode 100644
index 00000000000..9f0ef98100e
--- /dev/null
+++ b/library/cpp/yt/system/process_id-inl.h
@@ -0,0 +1,35 @@
+#ifndef PROCESS_ID_INL_H_
+#error "Direct inclusion of this file is not allowed, include process_id.h"
+// For the sake of sane code completion.
+#include "process_id.h"
+#endif
+
+#include <atomic>
+
+#include <util/system/compiler.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+TProcessId GetProcessIdImpl();
+
+} // namespace NDetail
+
+extern std::atomic<TProcessId> CachedProcessId;
+
+inline TProcessId GetProcessId()
+{
+ auto cachedProcessId = CachedProcessId.load(std::memory_order::relaxed);
+ if (cachedProcessId == InvalidProcessId) [[unlikely]] {
+ cachedProcessId = NDetail::GetProcessIdImpl();
+ CachedProcessId.store(cachedProcessId, std::memory_order::relaxed);
+ }
+ return cachedProcessId;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/library/cpp/yt/system/process_id.cpp b/library/cpp/yt/system/process_id.cpp
new file mode 100644
index 00000000000..0be6dfd2913
--- /dev/null
+++ b/library/cpp/yt/system/process_id.cpp
@@ -0,0 +1,37 @@
+#include "process_id.h"
+
+#ifdef _unix_
+#include <library/cpp/yt/misc/static_initializer.h>
+
+#include <pthread.h>
+#endif
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::atomic<TProcessId> CachedProcessId = InvalidProcessId;
+
+namespace NDetail {
+
+TProcessId GetProcessIdImpl()
+{
+ return ::GetPID();
+}
+
+} // namespace NDetail
+
+#ifdef _unix_
+// The pid is stable for the lifetime of a process, so we cache it to avoid the
+// |getpid| syscall on each call. After a |fork|, however, the child runs with a
+// fresh pid, so the cache must be invalidated there.
+YT_STATIC_INITIALIZER(
+ ::pthread_atfork(
+ /*prepare*/ nullptr,
+ /*parent*/ nullptr,
+ /*child*/ [] { CachedProcessId.store(InvalidProcessId, std::memory_order::relaxed); }));
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/library/cpp/yt/system/process_id.h b/library/cpp/yt/system/process_id.h
new file mode 100644
index 00000000000..4dcc89b13ab
--- /dev/null
+++ b/library/cpp/yt/system/process_id.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <util/system/getpid.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+constexpr TProcessId InvalidProcessId = TProcessId(-1);
+
+//! Returns the OS process id (|getpid|).
+//! The value is cached process-wide, so only the first call hits the kernel.
+//! The cache is reset in the child after |fork|.
+TProcessId GetProcessId();
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
+
+#define PROCESS_ID_INL_H_
+#include "process_id-inl.h"
+#undef PROCESS_ID_INL_H_
diff --git a/library/cpp/yt/system/thread_id-inl.h b/library/cpp/yt/system/thread_id-inl.h
index 916d70436ed..6139ff863c2 100644
--- a/library/cpp/yt/system/thread_id-inl.h
+++ b/library/cpp/yt/system/thread_id-inl.h
@@ -14,6 +14,25 @@ namespace NYT {
////////////////////////////////////////////////////////////////////////////////
+namespace NDetail {
+
+TSystemThreadId GetSystemThreadIdImpl();
+
+} // namespace NDetail
+
+YT_DECLARE_THREAD_LOCAL(TSystemThreadId, CachedSystemThreadId);
+
+inline TSystemThreadId GetSystemThreadId()
+{
+ auto& cachedSystemThreadId = CachedSystemThreadId();
+ if (Y_UNLIKELY(cachedSystemThreadId == InvalidSystemThreadId)) {
+ cachedSystemThreadId = NDetail::GetSystemThreadIdImpl();
+ }
+ return cachedSystemThreadId;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
YT_DECLARE_THREAD_LOCAL(TSequentialThreadId, CachedSequentialThreadId);
extern std::atomic<TSequentialThreadId> SequentialThreadIdGenerator;
diff --git a/library/cpp/yt/system/thread_id.cpp b/library/cpp/yt/system/thread_id.cpp
index a1971dd0dd8..978c735df25 100644
--- a/library/cpp/yt/system/thread_id.cpp
+++ b/library/cpp/yt/system/thread_id.cpp
@@ -2,19 +2,43 @@
#include <util/system/thread.h>
+#ifdef _unix_
+#include <library/cpp/yt/misc/static_initializer.h>
+
+#include <pthread.h>
+#endif
+
namespace NYT {
////////////////////////////////////////////////////////////////////////////////
+YT_DEFINE_THREAD_LOCAL(TSystemThreadId, CachedSystemThreadId, InvalidSystemThreadId);
+
YT_DEFINE_THREAD_LOCAL(TSequentialThreadId, CachedSequentialThreadId, InvalidSequentialThreadId);
std::atomic<TSequentialThreadId> SequentialThreadIdGenerator = InvalidSequentialThreadId;
-TSystemThreadId GetSystemThreadId()
+namespace NDetail {
+
+TSystemThreadId GetSystemThreadIdImpl()
{
static_assert(std::is_same_v<TSystemThreadId, ::TThread::TId>);
return ::TThread::CurrentThreadNumericId();
}
+} // namespace NDetail
+
+#ifdef _unix_
+// The kernel tid is stable for the lifetime of a thread, so we cache it to
+// avoid the |gettid| syscall on each call. After a |fork|, however, the
+// surviving (calling) thread of the child gets a fresh kernel tid, so the
+// cache must be invalidated there.
+YT_STATIC_INITIALIZER(
+ ::pthread_atfork(
+ /*prepare*/ nullptr,
+ /*parent*/ nullptr,
+ /*child*/ [] { CachedSystemThreadId() = InvalidSystemThreadId; }));
+#endif
+
////////////////////////////////////////////////////////////////////////////////
-} // namespace NYT::NThreading
+} // namespace NYT
diff --git a/library/cpp/yt/system/thread_id.h b/library/cpp/yt/system/thread_id.h
index 1f75067015d..75960d036e1 100644
--- a/library/cpp/yt/system/thread_id.h
+++ b/library/cpp/yt/system/thread_id.h
@@ -10,6 +10,9 @@ namespace NYT {
using TSystemThreadId = size_t;
constexpr TSystemThreadId InvalidSystemThreadId = Max<TSystemThreadId>();
+//! Returns the OS thread id (e.g. |gettid| on Linux).
+//! The value is cached in TLS, so only the first call per thread hits the kernel.
+//! The cache is reset in the child after |fork|.
TSystemThreadId GetSystemThreadId();
using TSequentialThreadId = ui32;
diff --git a/library/cpp/yt/misc/thread_name.cpp b/library/cpp/yt/system/thread_name.cpp
index 11e20abcf5c..11e20abcf5c 100644
--- a/library/cpp/yt/misc/thread_name.cpp
+++ b/library/cpp/yt/system/thread_name.cpp
diff --git a/library/cpp/yt/misc/thread_name.h b/library/cpp/yt/system/thread_name.h
index 35153880831..35153880831 100644
--- a/library/cpp/yt/misc/thread_name.h
+++ b/library/cpp/yt/system/thread_name.h
diff --git a/library/cpp/yt/system/ya.make b/library/cpp/yt/system/ya.make
index 3f767f78f49..72e12e8a348 100644
--- a/library/cpp/yt/system/ya.make
+++ b/library/cpp/yt/system/ya.make
@@ -5,7 +5,9 @@ INCLUDE(${ARCADIA_ROOT}/library/cpp/yt/ya_cpp.make.inc)
SRCS(
env.cpp
exit.cpp
+ process_id.cpp
thread_id.cpp
+ thread_name.cpp
)
PEERDIR(
@@ -15,6 +17,10 @@ PEERDIR(
END()
+RECURSE(
+ benchmarks
+)
+
RECURSE_FOR_TESTS(
unittests
)