diff options
| author | babenko <[email protected]> | 2026-06-06 23:52:00 +0300 |
|---|---|---|
| committer | babenko <[email protected]> | 2026-06-07 00:16:49 +0300 |
| commit | f10c7206fb31af8057446bceef9707aabaa9456e (patch) | |
| tree | e3283bfe824beda100b4a598006fdbc2fe221725 /library/cpp/yt | |
| parent | 51122ce7f1b07aa13c32b9977a8aad5759854b15 (diff) | |
Cache process/thread id getters and use them in TError origin capture
## Motivation
Profiling the YT master Automaton thread showed TOriginAttributes::Capture (run on every non-OK TError) spending ~60% of its time in a getpid() syscall — uncached on glibc >= 2.25. NYT::GetCurrentThreadId() (gettid) feeds hot thread-affinity / log-manager checks on the same thread.
## Changes
- New library/cpp/yt/system/process_id.* with cached GetProcessId(); GetSystemThreadId() now caches the kernel tid in TLS. Both caches reset in the child after fork.
- Moved thread_name.{h,cpp} from misc to system.
- Removed GetCurrentProcessId/GetCurrentThreadId shims from yt/yt/core/misc/proc.{h,cpp}; migrated all callers to NYT::GetProcessId / NYT::GetSystemThreadId.
- TOriginAttributes::Capture uses the cached getters; recorded Tid is now the real kernel tid (matches perf/ps).
- Added microbenchmarks (library/cpp/yt/system/benchmarks, yt/yt/core/benchmarks/error.cpp).
## Microbenchmarks (release)
| | before | after |
|---|---|---|
| getpid | 101 ns | 0.33 ns |
| gettid | 102 ns | 1.64 ns |
| Capture | 161 ns | 50 ns |
| failed TError | 221 ns | 74 ns |
commit_hash:ee37ae57d61a5a2dd33daee935270f4bb93b7ff9
Diffstat (limited to 'library/cpp/yt')
19 files changed, 261 insertions, 10 deletions
diff --git a/library/cpp/yt/error/origin_attributes.cpp b/library/cpp/yt/error/origin_attributes.cpp index 18582bf3e83..2198912d419 100644 --- a/library/cpp/yt/error/origin_attributes.cpp +++ b/library/cpp/yt/error/origin_attributes.cpp @@ -3,12 +3,13 @@ #include <library/cpp/yt/assert/assert.h> -#include <library/cpp/yt/misc/thread_name.h> #include <library/cpp/yt/misc/tls.h> #include <library/cpp/yt/string/format.h> -#include <util/system/thread.h> +#include <library/cpp/yt/system/process_id.h> +#include <library/cpp/yt/system/thread_id.h> +#include <library/cpp/yt/system/thread_name.h> namespace NYT { @@ -65,8 +66,8 @@ void TOriginAttributes::Capture() } Datetime = TInstant::Now(); - Pid = GetPID(); - Tid = TThread::CurrentThreadId(); + Pid = GetProcessId(); + Tid = GetSystemThreadId(); ThreadName = GetCurrentThreadName(); ExtensionData = NDetail::GetExtensionData(); } diff --git a/library/cpp/yt/error/origin_attributes.h b/library/cpp/yt/error/origin_attributes.h index 8804cdf2569..335e2110dcc 100644 --- a/library/cpp/yt/error/origin_attributes.h +++ b/library/cpp/yt/error/origin_attributes.h @@ -7,7 +7,8 @@ #include <library/cpp/yt/memory/ref.h> #include <library/cpp/yt/misc/guid.h> -#include <library/cpp/yt/misc/thread_name.h> + +#include <library/cpp/yt/system/thread_name.h> #include <library/cpp/yt/threading/public.h> diff --git a/library/cpp/yt/error/ya.make b/library/cpp/yt/error/ya.make index 3fe4f88bed7..716af327361 100644 --- a/library/cpp/yt/error/ya.make +++ b/library/cpp/yt/error/ya.make @@ -8,6 +8,7 @@ PEERDIR( library/cpp/yt/global library/cpp/yt/memory library/cpp/yt/misc + library/cpp/yt/system library/cpp/yt/threading library/cpp/yt/string library/cpp/yt/logging # TODO(arkady-e1ppa): Consider logging error_code crashes to stderr and drop this dep. diff --git a/library/cpp/yt/logging/logger.cpp b/library/cpp/yt/logging/logger.cpp index e0af30382d6..458d01d8133 100644 --- a/library/cpp/yt/logging/logger.cpp +++ b/library/cpp/yt/logging/logger.cpp @@ -4,7 +4,7 @@ #include <library/cpp/yt/cpu_clock/clock.h> -#include <library/cpp/yt/misc/thread_name.h> +#include <library/cpp/yt/system/thread_name.h> #include <util/system/compiler.h> #include <util/system/thread.h> diff --git a/library/cpp/yt/logging/logger.h b/library/cpp/yt/logging/logger.h index e3e6f58d5a3..4cb6021e53f 100644 --- a/library/cpp/yt/logging/logger.h +++ b/library/cpp/yt/logging/logger.h @@ -12,7 +12,7 @@ #include <library/cpp/yt/misc/guid.h> -#include <library/cpp/yt/misc/thread_name.h> +#include <library/cpp/yt/system/thread_name.h> #include <library/cpp/yt/memory/leaky_singleton.h> diff --git a/library/cpp/yt/logging/ya.make b/library/cpp/yt/logging/ya.make index e611c2e554c..8b338ed3b77 100644 --- a/library/cpp/yt/logging/ya.make +++ b/library/cpp/yt/logging/ya.make @@ -10,6 +10,7 @@ PEERDIR( library/cpp/yt/assert library/cpp/yt/memory library/cpp/yt/misc + library/cpp/yt/system library/cpp/yt/yson_string ) diff --git a/library/cpp/yt/misc/ya.make b/library/cpp/yt/misc/ya.make index 3135887e95e..5d2d6d67f33 100644 --- a/library/cpp/yt/misc/ya.make +++ b/library/cpp/yt/misc/ya.make @@ -5,7 +5,6 @@ INCLUDE(${ARCADIA_ROOT}/library/cpp/yt/ya_cpp.make.inc) SRCS( guid.cpp source_location.cpp - thread_name.cpp ) PEERDIR( diff --git a/library/cpp/yt/system/benchmarks/process.cpp b/library/cpp/yt/system/benchmarks/process.cpp new file mode 100644 index 00000000000..74b4b6b1e13 --- /dev/null +++ b/library/cpp/yt/system/benchmarks/process.cpp @@ -0,0 +1,35 @@ +#include <benchmark/benchmark.h> + +#include <library/cpp/yt/system/process_id.h> + +#include <util/system/getpid.h> + +namespace NYT { +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +void BM_GetProcessId(benchmark::State& state) +{ + // Cached getpid: only the first call hits the kernel. + for (auto _ : state) { + benchmark::DoNotOptimize(GetProcessId()); + } +} + +BENCHMARK(BM_GetProcessId); + +void BM_RawGetPid(benchmark::State& state) +{ + // Uncached getpid syscall (uncached on glibc >= 2.25), for comparison. + for (auto _ : state) { + benchmark::DoNotOptimize(::GetPID()); + } +} + +BENCHMARK(BM_RawGetPid); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace +} // namespace NYT diff --git a/library/cpp/yt/system/benchmarks/thread.cpp b/library/cpp/yt/system/benchmarks/thread.cpp new file mode 100644 index 00000000000..5aa5f7dc2d9 --- /dev/null +++ b/library/cpp/yt/system/benchmarks/thread.cpp @@ -0,0 +1,55 @@ +#include <benchmark/benchmark.h> + +#include <library/cpp/yt/system/thread_id.h> +#include <library/cpp/yt/system/thread_name.h> + +#include <util/system/thread.h> + +namespace NYT { +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +void BM_GetSystemThreadId(benchmark::State& state) +{ + // Cached gettid: only the first call per thread hits the kernel. + for (auto _ : state) { + benchmark::DoNotOptimize(GetSystemThreadId()); + } +} + +BENCHMARK(BM_GetSystemThreadId); + +void BM_GetSequentialThreadId(benchmark::State& state) +{ + for (auto _ : state) { + benchmark::DoNotOptimize(GetSequentialThreadId()); + } +} + +BENCHMARK(BM_GetSequentialThreadId); + +void BM_RawGetTid(benchmark::State& state) +{ + // Uncached gettid syscall, for comparison. + for (auto _ : state) { + benchmark::DoNotOptimize(::TThread::CurrentThreadNumericId()); + } +} + +BENCHMARK(BM_RawGetTid); + +void BM_GetCurrentThreadName(benchmark::State& state) +{ + // TLS-cached thread name (also read by TOriginAttributes::Capture). + for (auto _ : state) { + benchmark::DoNotOptimize(GetCurrentThreadName()); + } +} + +BENCHMARK(BM_GetCurrentThreadName); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace +} // namespace NYT diff --git a/library/cpp/yt/system/benchmarks/ya.make b/library/cpp/yt/system/benchmarks/ya.make new file mode 100644 index 00000000000..874d8df323d --- /dev/null +++ b/library/cpp/yt/system/benchmarks/ya.make @@ -0,0 +1,12 @@ +G_BENCHMARK() + +SRCS( + process.cpp + thread.cpp +) + +PEERDIR( + library/cpp/yt/system +) + +END() diff --git a/library/cpp/yt/system/process_id-inl.h b/library/cpp/yt/system/process_id-inl.h new file mode 100644 index 00000000000..9f0ef98100e --- /dev/null +++ b/library/cpp/yt/system/process_id-inl.h @@ -0,0 +1,35 @@ +#ifndef PROCESS_ID_INL_H_ +#error "Direct inclusion of this file is not allowed, include process_id.h" +// For the sake of sane code completion. +#include "process_id.h" +#endif + +#include <atomic> + +#include <util/system/compiler.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +namespace NDetail { + +TProcessId GetProcessIdImpl(); + +} // namespace NDetail + +extern std::atomic<TProcessId> CachedProcessId; + +inline TProcessId GetProcessId() +{ + auto cachedProcessId = CachedProcessId.load(std::memory_order::relaxed); + if (cachedProcessId == InvalidProcessId) [[unlikely]] { + cachedProcessId = NDetail::GetProcessIdImpl(); + CachedProcessId.store(cachedProcessId, std::memory_order::relaxed); + } + return cachedProcessId; +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yt/system/process_id.cpp b/library/cpp/yt/system/process_id.cpp new file mode 100644 index 00000000000..0be6dfd2913 --- /dev/null +++ b/library/cpp/yt/system/process_id.cpp @@ -0,0 +1,37 @@ +#include "process_id.h" + +#ifdef _unix_ +#include <library/cpp/yt/misc/static_initializer.h> + +#include <pthread.h> +#endif + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +std::atomic<TProcessId> CachedProcessId = InvalidProcessId; + +namespace NDetail { + +TProcessId GetProcessIdImpl() +{ + return ::GetPID(); +} + +} // namespace NDetail + +#ifdef _unix_ +// The pid is stable for the lifetime of a process, so we cache it to avoid the +// |getpid| syscall on each call. After a |fork|, however, the child runs with a +// fresh pid, so the cache must be invalidated there. +YT_STATIC_INITIALIZER( + ::pthread_atfork( + /*prepare*/ nullptr, + /*parent*/ nullptr, + /*child*/ [] { CachedProcessId.store(InvalidProcessId, std::memory_order::relaxed); })); +#endif + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yt/system/process_id.h b/library/cpp/yt/system/process_id.h new file mode 100644 index 00000000000..4dcc89b13ab --- /dev/null +++ b/library/cpp/yt/system/process_id.h @@ -0,0 +1,22 @@ +#pragma once + +#include <util/system/getpid.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +constexpr TProcessId InvalidProcessId = TProcessId(-1); + +//! Returns the OS process id (|getpid|). +//! The value is cached process-wide, so only the first call hits the kernel. +//! The cache is reset in the child after |fork|. +TProcessId GetProcessId(); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT + +#define PROCESS_ID_INL_H_ +#include "process_id-inl.h" +#undef PROCESS_ID_INL_H_ diff --git a/library/cpp/yt/system/thread_id-inl.h b/library/cpp/yt/system/thread_id-inl.h index 916d70436ed..6139ff863c2 100644 --- a/library/cpp/yt/system/thread_id-inl.h +++ b/library/cpp/yt/system/thread_id-inl.h @@ -14,6 +14,25 @@ namespace NYT { //////////////////////////////////////////////////////////////////////////////// +namespace NDetail { + +TSystemThreadId GetSystemThreadIdImpl(); + +} // namespace NDetail + +YT_DECLARE_THREAD_LOCAL(TSystemThreadId, CachedSystemThreadId); + +inline TSystemThreadId GetSystemThreadId() +{ + auto& cachedSystemThreadId = CachedSystemThreadId(); + if (Y_UNLIKELY(cachedSystemThreadId == InvalidSystemThreadId)) { + cachedSystemThreadId = NDetail::GetSystemThreadIdImpl(); + } + return cachedSystemThreadId; +} + +//////////////////////////////////////////////////////////////////////////////// + YT_DECLARE_THREAD_LOCAL(TSequentialThreadId, CachedSequentialThreadId); extern std::atomic<TSequentialThreadId> SequentialThreadIdGenerator; diff --git a/library/cpp/yt/system/thread_id.cpp b/library/cpp/yt/system/thread_id.cpp index a1971dd0dd8..978c735df25 100644 --- a/library/cpp/yt/system/thread_id.cpp +++ b/library/cpp/yt/system/thread_id.cpp @@ -2,19 +2,43 @@ #include <util/system/thread.h> +#ifdef _unix_ +#include <library/cpp/yt/misc/static_initializer.h> + +#include <pthread.h> +#endif + namespace NYT { //////////////////////////////////////////////////////////////////////////////// +YT_DEFINE_THREAD_LOCAL(TSystemThreadId, CachedSystemThreadId, InvalidSystemThreadId); + YT_DEFINE_THREAD_LOCAL(TSequentialThreadId, CachedSequentialThreadId, InvalidSequentialThreadId); std::atomic<TSequentialThreadId> SequentialThreadIdGenerator = InvalidSequentialThreadId; -TSystemThreadId GetSystemThreadId() +namespace NDetail { + +TSystemThreadId GetSystemThreadIdImpl() { static_assert(std::is_same_v<TSystemThreadId, ::TThread::TId>); return ::TThread::CurrentThreadNumericId(); } +} // namespace NDetail + +#ifdef _unix_ +// The kernel tid is stable for the lifetime of a thread, so we cache it to +// avoid the |gettid| syscall on each call. After a |fork|, however, the +// surviving (calling) thread of the child gets a fresh kernel tid, so the +// cache must be invalidated there. +YT_STATIC_INITIALIZER( + ::pthread_atfork( + /*prepare*/ nullptr, + /*parent*/ nullptr, + /*child*/ [] { CachedSystemThreadId() = InvalidSystemThreadId; })); +#endif + //////////////////////////////////////////////////////////////////////////////// -} // namespace NYT::NThreading +} // namespace NYT diff --git a/library/cpp/yt/system/thread_id.h b/library/cpp/yt/system/thread_id.h index 1f75067015d..75960d036e1 100644 --- a/library/cpp/yt/system/thread_id.h +++ b/library/cpp/yt/system/thread_id.h @@ -10,6 +10,9 @@ namespace NYT { using TSystemThreadId = size_t; constexpr TSystemThreadId InvalidSystemThreadId = Max<TSystemThreadId>(); +//! Returns the OS thread id (e.g. |gettid| on Linux). +//! The value is cached in TLS, so only the first call per thread hits the kernel. +//! The cache is reset in the child after |fork|. TSystemThreadId GetSystemThreadId(); using TSequentialThreadId = ui32; diff --git a/library/cpp/yt/misc/thread_name.cpp b/library/cpp/yt/system/thread_name.cpp index 11e20abcf5c..11e20abcf5c 100644 --- a/library/cpp/yt/misc/thread_name.cpp +++ b/library/cpp/yt/system/thread_name.cpp diff --git a/library/cpp/yt/misc/thread_name.h b/library/cpp/yt/system/thread_name.h index 35153880831..35153880831 100644 --- a/library/cpp/yt/misc/thread_name.h +++ b/library/cpp/yt/system/thread_name.h diff --git a/library/cpp/yt/system/ya.make b/library/cpp/yt/system/ya.make index 3f767f78f49..72e12e8a348 100644 --- a/library/cpp/yt/system/ya.make +++ b/library/cpp/yt/system/ya.make @@ -5,7 +5,9 @@ INCLUDE(${ARCADIA_ROOT}/library/cpp/yt/ya_cpp.make.inc) SRCS( env.cpp exit.cpp + process_id.cpp thread_id.cpp + thread_name.cpp ) PEERDIR( @@ -15,6 +17,10 @@ PEERDIR( END() +RECURSE( + benchmarks +) + RECURSE_FOR_TESTS( unittests ) |
