diff options
| author | babenko <[email protected]> | 2026-06-14 18:52:20 +0300 |
|---|---|---|
| committer | babenko <[email protected]> | 2026-06-14 19:17:43 +0300 |
| commit | 52f00b11c4259dfa810ecdcba576a801dd043a87 (patch) | |
| tree | e5d7550d8387b03d9a862a2a4a76968da0e33853 /library/cpp | |
| parent | e68d615f34b4fd7d9b7309efa15366c6136d5bf9 (diff) | |
Fix rseq fast path on glibc < 2.35: read the shared __rseq_abi area
The own-area approach did not deliver the fast path on glibc 2.31 (YT's current
runtime). There tcmalloc registers the conventional `__rseq_abi` area for every
thread; our attempt to register a separate area was rejected by the kernel with
EINVAL (a thread may have only one rseq area), so `cpu_id` stayed -1 and every
`GetCurrentCpuId()` fell back to `sched_getcpu()` (~17-20 ns, slower than the
rdtscp it replaced).
Read the shared `__rseq_abi` symbol instead -- the area tcmalloc, librseq and
pre-2.35 glibc all register. Our definition is weak, so it coalesces with theirs
when present (the common case -- tcmalloc owns it) and stands alone otherwise
(e.g. musl), with us registering it. We register with the conventional signature
`0x53053053` and size 32, so re-registering an already-registered area returns
EBUSY (treated as success) rather than EINVAL -- coexisting cleanly with tcmalloc.
glibc >= 2.35 still takes the `__rseq_offset` path unchanged.
Measured on sas2-2769 (glibc 2.31 + tcmalloc): `GetCurrentCpuId()` 20.0 ns -> 0.60 ns,
verified via strace that our registration now returns EBUSY against tcmalloc's
`__rseq_abi` (was EINVAL against a separate area).
commit_hash:509809deeb5f7c671817fcd9ebcc8499eabf096e
Diffstat (limited to 'library/cpp')
| -rw-r--r-- | library/cpp/yt/rseq/rseq.cpp | 71 |
1 files changed, 43 insertions, 28 deletions
diff --git a/library/cpp/yt/rseq/rseq.cpp b/library/cpp/yt/rseq/rseq.cpp index 4ac739cfca9..9ebcb4f45ef 100644 --- a/library/cpp/yt/rseq/rseq.cpp +++ b/library/cpp/yt/rseq/rseq.cpp @@ -10,6 +10,7 @@ #include <sys/syscall.h> #include <unistd.h> +#include <cerrno> #include <cstddef> #include <utility> @@ -33,6 +34,11 @@ namespace { // offset 4 and is the only field we read. constexpr unsigned RseqRegistrationSize = 32; +// The conventional rseq signature shared by glibc, librseq and tcmalloc. We must pass +// the same one so that re-registering an already-registered area yields EBUSY (success) +// rather than EINVAL; see RegisterCurrentThread. +constexpr unsigned RseqSignature = 0x53053053; + struct alignas(32) TRseqArea { ui32 CpuIdStart; @@ -52,38 +58,50 @@ extern const std::ptrdiff_t __rseq_offset __attribute__((weak)); extern const unsigned int __rseq_size __attribute__((weak)); } // extern "C" -// Our own per-thread rseq area, used when glibc does not own the registration. -// initial-exec so its offset from the thread pointer is a link-time constant, and -// CpuId starts at -1 so an unregistered thread takes the slow path. -__thread TRseqArea OwnRseqArea __attribute__((tls_model("initial-exec"), aligned(32))) = { +// The legacy per-thread rseq area. tcmalloc, librseq and pre-2.35 glibc all define and +// register this exact symbol; our definition is weak, so it coalesces with theirs when +// present (the common case in YT binaries -- tcmalloc owns it) and stands alone, with us +// registering it, otherwise (e.g. musl). initial-exec gives a link-time-constant offset +// from the thread pointer; CpuId starts at -1 so an unregistered thread takes the slow +// path. +extern "C" { +__thread TRseqArea __rseq_abi __attribute__((weak, tls_model("initial-exec"), aligned(32))) = { .CpuId = static_cast<ui32>(-1), }; +} // extern "C" -// True iff we (not glibc) own the registration and the kernel supports rseq. +// True when we read __rseq_abi (not the glibc-owned area) and so must make sure each +// thread is registered. bool OwnsRegistration = false; bool RegisterCurrentThread() { - // flags = 0, signature = 0: we never use restartable critical sections, so the - // signature is irrelevant (it is only checked at an rseq_cs abort handler). - return ::syscall(RseqSyscallNumber, &OwnRseqArea, RseqRegistrationSize, 0u, 0u) == 0; + // flags = 0. We pass the shared signature and the standard size so that whoever of + // {us, tcmalloc, librseq} runs first registers __rseq_abi and the rest get EBUSY, + // which is success for our read-only use (we never run rseq critical sections, so the + // signature only ever matters for matching this registration call). + if (::syscall(RseqSyscallNumber, &__rseq_abi, RseqRegistrationSize, 0u, RseqSignature) == 0) { + return true; + } + return errno == EBUSY; } YT_PREVENT_TLS_CACHING std::ptrdiff_t ComputeCpuIdFieldOffset() { if (&__rseq_size != nullptr && __rseq_size != 0) { - // glibc owns the registration and keeps every thread's cpu_id up to date. + // glibc owns the registration and keeps every thread's cpu_id up to date in its + // own area; just read it. return __rseq_offset + static_cast<std::ptrdiff_t>(offsetof(TRseqArea, CpuId)); } - // We own the registration. Probe kernel support by registering this (main) thread; - // other threads register lazily on their first slow-path call. Point at our area - // either way: cpu_id holds the real value once registered and stays -1 (routing to - // the slow path) when it is not. - if (RegisterCurrentThread()) { - OwnsRegistration = true; - } + // Otherwise use __rseq_abi. Register this (main) thread; other threads register + // lazily on their first slow-path call. The offset points at __rseq_abi either way: + // cpu_id holds the real value once the thread is registered and stays -1 (routing to + // the slow path) until then. + OwnsRegistration = true; + RegisterCurrentThread(); auto* threadPointer = static_cast<char*>(__builtin_thread_pointer()); - return (reinterpret_cast<char*>(&OwnRseqArea) - threadPointer) + static_cast<std::ptrdiff_t>(offsetof(TRseqArea, CpuId)); + return (reinterpret_cast<char*>(&__rseq_abi) - threadPointer) + + static_cast<std::ptrdiff_t>(offsetof(TRseqArea, CpuId)); } YT_STATIC_INITIALIZER({ @@ -94,18 +112,15 @@ YT_STATIC_INITIALIZER({ YT_PREVENT_TLS_CACHING bool EnsureCurrentThreadRegistered() { - if (!OwnsRegistration) { - // Either glibc owns the registration (every thread is already registered) or - // rseq is unavailable. The two are told apart by what cpu_id reads: a valid - // (>= 0) value means registered. - return ReadField<int>(CpuIdFieldOffset) >= 0; - } - - // We own the registration: register this thread once, on first use. - thread_local bool RegistrationAttempted = false; - if (!std::exchange(RegistrationAttempted, true)) { - RegisterCurrentThread(); + if (OwnsRegistration) { + // Register this thread once, on first use. Usually a no-op (EBUSY): in YT + // binaries tcmalloc registers __rseq_abi for every thread before we get here. + thread_local bool RegistrationAttempted = false; + if (!std::exchange(RegistrationAttempted, true)) { + RegisterCurrentThread(); + } } + // Either way the thread is registered iff cpu_id reads as valid (>= 0). return ReadField<int>(CpuIdFieldOffset) >= 0; } |
