diff options
author | agri <agri@yandex-team.ru> | 2022-02-10 16:48:12 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:48:12 +0300 |
commit | 2909866fbc652492b7d7cab3023cb19489dc4fd8 (patch) | |
tree | b222e5ac2e2e98872661c51ccceee5da0d291e13 /library/cpp/actors/memory_log/memlog.cpp | |
parent | d3530b2692e400bd4d29bd4f07cafaee139164e7 (diff) | |
download | ydb-2909866fbc652492b7d7cab3023cb19489dc4fd8.tar.gz |
Restoring authorship annotation for <agri@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/actors/memory_log/memlog.cpp')
-rw-r--r-- | library/cpp/actors/memory_log/memlog.cpp | 514 |
1 files changed, 257 insertions, 257 deletions
diff --git a/library/cpp/actors/memory_log/memlog.cpp b/library/cpp/actors/memory_log/memlog.cpp index f20162db70..8e6b46727d 100644 --- a/library/cpp/actors/memory_log/memlog.cpp +++ b/library/cpp/actors/memory_log/memlog.cpp @@ -1,28 +1,28 @@ -#include "memlog.h" - +#include "memlog.h" + #include <library/cpp/actors/util/datetime.h> -#include <util/system/info.h> -#include <util/system/atomic.h> -#include <util/system/align.h> - -#include <contrib/libs/linuxvdso/interface.h> - -#if (defined(_i386_) || defined(_x86_64_)) && defined(_linux_) -#define HAVE_VDSO_GETCPU 1 -#include <contrib/libs/linuxvdso/interface.h> -static int (*FastGetCpu)(unsigned* cpu, unsigned* node, void* unused); -#endif - -#if defined(_unix_) +#include <util/system/info.h> +#include <util/system/atomic.h> +#include <util/system/align.h> + +#include <contrib/libs/linuxvdso/interface.h> + +#if (defined(_i386_) || defined(_x86_64_)) && defined(_linux_) +#define HAVE_VDSO_GETCPU 1 +#include <contrib/libs/linuxvdso/interface.h> +static int (*FastGetCpu)(unsigned* cpu, unsigned* node, void* unused); +#endif + +#if defined(_unix_) #include <sched.h> -#elif defined(_win_) +#elif defined(_win_) #include <WinBase.h> -#else +#else #error NO IMPLEMENTATION FOR THE PLATFORM -#endif - -const char TMemoryLog::DEFAULT_LAST_MARK[16] = { +#endif + +const char TMemoryLog::DEFAULT_LAST_MARK[16] = { 'c', 'b', '7', @@ -39,9 +39,9 @@ const char TMemoryLog::DEFAULT_LAST_MARK[16] = { '4', '5', '\n', -}; - -const char TMemoryLog::CLEAR_MARK[16] = { +}; + +const char TMemoryLog::CLEAR_MARK[16] = { ' ', ' ', ' ', @@ -58,146 +58,146 @@ const char TMemoryLog::CLEAR_MARK[16] = { ' ', ' ', '\n', -}; - -unsigned TMemoryLog::GetSelfCpu() noexcept { -#if defined(_unix_) +}; + +unsigned TMemoryLog::GetSelfCpu() noexcept { +#if defined(_unix_) #if HAVE_VDSO_GETCPU - unsigned cpu; - if (Y_LIKELY(FastGetCpu != nullptr)) { - auto result = FastGetCpu(&cpu, nullptr, nullptr); - Y_VERIFY(result == 0); + unsigned cpu; + if (Y_LIKELY(FastGetCpu != nullptr)) { + auto result = FastGetCpu(&cpu, nullptr, nullptr); + Y_VERIFY(result == 0); return cpu; - } else { - return 0; - } - + } else { + return 0; + } + #elif defined(_x86_64_) || defined(_i386_) - + #define CPUID(func, eax, ebx, ecx, edx) \ __asm__ __volatile__( \ "cpuid" \ : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) \ : "a"(func)); - - int a = 0, b = 0, c = 0, d = 0; - CPUID(0x1, a, b, c, d); - int acpiID = (b >> 24); - return acpiID; - + + int a = 0, b = 0, c = 0, d = 0; + CPUID(0x1, a, b, c, d); + int acpiID = (b >> 24); + return acpiID; + #elif defined(__CNUC__) - return sched_getcpu(); + return sched_getcpu(); #else - return 0; + return 0; #endif - -#elif defined(_win_) - return GetCurrentProcessorNumber(); -#else - return 0; -#endif -} - -TMemoryLog* TMemoryLog::MemLogBuffer = nullptr; + +#elif defined(_win_) + return GetCurrentProcessorNumber(); +#else + return 0; +#endif +} + +TMemoryLog* TMemoryLog::MemLogBuffer = nullptr; Y_POD_THREAD(TThread::TId) TMemoryLog::LogThreadId; -char* TMemoryLog::LastMarkIsHere = nullptr; - -std::atomic<bool> TMemoryLog::PrintLastMark(true); - +char* TMemoryLog::LastMarkIsHere = nullptr; + +std::atomic<bool> TMemoryLog::PrintLastMark(true); + TMemoryLog::TMemoryLog(size_t totalSize, size_t grainSize) : GrainSize(grainSize) , FreeGrains(DEFAULT_TOTAL_SIZE / DEFAULT_GRAIN_SIZE * 2) , Buf(totalSize) -{ - Y_VERIFY(DEFAULT_TOTAL_SIZE % DEFAULT_GRAIN_SIZE == 0); - NumberOfGrains = DEFAULT_TOTAL_SIZE / DEFAULT_GRAIN_SIZE; - - for (size_t i = 0; i < NumberOfGrains; ++i) { - new (GetGrain(i)) TGrain; - } - - NumberOfCpus = NSystemInfo::NumberOfCpus(); - Y_VERIFY(NumberOfGrains > NumberOfCpus); - ActiveGrains.Reset(new TGrain*[NumberOfCpus]); - for (size_t i = 0; i < NumberOfCpus; ++i) { - ActiveGrains[i] = GetGrain(i); - } - - for (size_t i = NumberOfCpus; i < NumberOfGrains; ++i) { - FreeGrains.StubbornPush(GetGrain(i)); - } - -#if HAVE_VDSO_GETCPU - auto vdsoFunc = (decltype(FastGetCpu)) - NVdso::Function("__vdso_getcpu", "LINUX_2.6"); - AtomicSet(FastGetCpu, vdsoFunc); -#endif -} - -void* TMemoryLog::GetWriteBuffer(size_t amount) noexcept { - // alignment required by NoCacheMemcpy - amount = AlignUp<size_t>(amount, MemcpyAlignment); - - for (ui16 tries = MAX_GET_BUFFER_TRIES; tries-- > 0;) { - auto myCpu = GetSelfCpu(); - - TGrain* grain = AtomicGet(ActiveGrains[myCpu]); - - if (grain != nullptr) { - auto mine = AtomicGetAndAdd(grain->WritePointer, amount); - if (mine + amount <= GrainSize - sizeof(TGrain)) { - return &grain->Data[mine]; - } - - if (!AtomicCas(&ActiveGrains[myCpu], 0, grain)) { - continue; - } - - FreeGrains.StubbornPush(grain); - } - - grain = (TGrain*)FreeGrains.Pop(); - - if (grain == nullptr) { - return nullptr; - } - - grain->WritePointer = 0; - - if (!AtomicCas(&ActiveGrains[myCpu], grain, 0)) { - FreeGrains.StubbornPush(grain); - continue; - } - } - - return nullptr; -} - -void ClearAlignedTail(char* tail) noexcept { - auto aligned = AlignUp(tail, TMemoryLog::MemcpyAlignment); - if (aligned > tail) { - memset(tail, 0, aligned - tail); - } -} - -#if defined(_x86_64_) || defined(_i386_) -#include <xmmintrin.h> -// the main motivation is not poluting CPU cache -NO_SANITIZE_THREAD -void NoCacheMemcpy(char* dst, const char* src, size_t size) noexcept { - while (size >= sizeof(__m128) * 2) { - __m128 a = _mm_load_ps((float*)(src + 0 * sizeof(__m128))); - __m128 b = _mm_load_ps((float*)(src + 1 * sizeof(__m128))); - _mm_stream_ps((float*)(dst + 0 * sizeof(__m128)), a); - _mm_stream_ps((float*)(dst + 1 * sizeof(__m128)), b); - - size -= sizeof(__m128) * 2; - src += sizeof(__m128) * 2; - dst += sizeof(__m128) * 2; - } - memcpy(dst, src, size); -} +{ + Y_VERIFY(DEFAULT_TOTAL_SIZE % DEFAULT_GRAIN_SIZE == 0); + NumberOfGrains = DEFAULT_TOTAL_SIZE / DEFAULT_GRAIN_SIZE; + + for (size_t i = 0; i < NumberOfGrains; ++i) { + new (GetGrain(i)) TGrain; + } + + NumberOfCpus = NSystemInfo::NumberOfCpus(); + Y_VERIFY(NumberOfGrains > NumberOfCpus); + ActiveGrains.Reset(new TGrain*[NumberOfCpus]); + for (size_t i = 0; i < NumberOfCpus; ++i) { + ActiveGrains[i] = GetGrain(i); + } + + for (size_t i = NumberOfCpus; i < NumberOfGrains; ++i) { + FreeGrains.StubbornPush(GetGrain(i)); + } + +#if HAVE_VDSO_GETCPU + auto vdsoFunc = (decltype(FastGetCpu)) + NVdso::Function("__vdso_getcpu", "LINUX_2.6"); + AtomicSet(FastGetCpu, vdsoFunc); +#endif +} + +void* TMemoryLog::GetWriteBuffer(size_t amount) noexcept { + // alignment required by NoCacheMemcpy + amount = AlignUp<size_t>(amount, MemcpyAlignment); + + for (ui16 tries = MAX_GET_BUFFER_TRIES; tries-- > 0;) { + auto myCpu = GetSelfCpu(); + + TGrain* grain = AtomicGet(ActiveGrains[myCpu]); + + if (grain != nullptr) { + auto mine = AtomicGetAndAdd(grain->WritePointer, amount); + if (mine + amount <= GrainSize - sizeof(TGrain)) { + return &grain->Data[mine]; + } + + if (!AtomicCas(&ActiveGrains[myCpu], 0, grain)) { + continue; + } + + FreeGrains.StubbornPush(grain); + } + + grain = (TGrain*)FreeGrains.Pop(); + + if (grain == nullptr) { + return nullptr; + } + + grain->WritePointer = 0; + + if (!AtomicCas(&ActiveGrains[myCpu], grain, 0)) { + FreeGrains.StubbornPush(grain); + continue; + } + } + + return nullptr; +} + +void ClearAlignedTail(char* tail) noexcept { + auto aligned = AlignUp(tail, TMemoryLog::MemcpyAlignment); + if (aligned > tail) { + memset(tail, 0, aligned - tail); + } +} + +#if defined(_x86_64_) || defined(_i386_) +#include <xmmintrin.h> +// the main motivation is not poluting CPU cache +NO_SANITIZE_THREAD +void NoCacheMemcpy(char* dst, const char* src, size_t size) noexcept { + while (size >= sizeof(__m128) * 2) { + __m128 a = _mm_load_ps((float*)(src + 0 * sizeof(__m128))); + __m128 b = _mm_load_ps((float*)(src + 1 * sizeof(__m128))); + _mm_stream_ps((float*)(dst + 0 * sizeof(__m128)), a); + _mm_stream_ps((float*)(dst + 1 * sizeof(__m128)), b); + + size -= sizeof(__m128) * 2; + src += sizeof(__m128) * 2; + dst += sizeof(__m128) * 2; + } + memcpy(dst, src, size); +} NO_SANITIZE_THREAD void NoWCacheMemcpy(char* dst, const char* src, size_t size) noexcept { @@ -224,144 +224,144 @@ void NoWCacheMemcpy(char* dst, const char* src, size_t size) noexcept { } } -#endif - -NO_SANITIZE_THREAD -char* BareMemLogWrite(const char* begin, size_t msgSize, bool isLast) noexcept { - bool lastMark = - isLast && TMemoryLog::PrintLastMark.load(std::memory_order_acquire); - size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize; - - char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount); - if (buffer == nullptr) { - return nullptr; - } - -#if defined(_x86_64_) || defined(_i386_) - if (AlignDown(begin, TMemoryLog::MemcpyAlignment) == begin) { - NoCacheMemcpy(buffer, begin, msgSize); +#endif + +NO_SANITIZE_THREAD +char* BareMemLogWrite(const char* begin, size_t msgSize, bool isLast) noexcept { + bool lastMark = + isLast && TMemoryLog::PrintLastMark.load(std::memory_order_acquire); + size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize; + + char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount); + if (buffer == nullptr) { + return nullptr; + } + +#if defined(_x86_64_) || defined(_i386_) + if (AlignDown(begin, TMemoryLog::MemcpyAlignment) == begin) { + NoCacheMemcpy(buffer, begin, msgSize); } else { NoWCacheMemcpy(buffer, begin, msgSize); } #else memcpy(buffer, begin, msgSize); #endif - - if (lastMark) { - TMemoryLog::ChangeLastMark(buffer + msgSize); - } - - ClearAlignedTail(buffer + amount); - return buffer; -} - -NO_SANITIZE_THREAD -bool MemLogWrite(const char* begin, size_t msgSize, bool addLF) noexcept { - bool lastMark = TMemoryLog::PrintLastMark.load(std::memory_order_acquire); - size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize; - - // Let's construct prolog with timestamp and thread id - auto threadId = TMemoryLog::GetTheadId(); - - // alignment required by NoCacheMemcpy - // check for format for snprintf - constexpr size_t prologSize = 48; + + if (lastMark) { + TMemoryLog::ChangeLastMark(buffer + msgSize); + } + + ClearAlignedTail(buffer + amount); + return buffer; +} + +NO_SANITIZE_THREAD +bool MemLogWrite(const char* begin, size_t msgSize, bool addLF) noexcept { + bool lastMark = TMemoryLog::PrintLastMark.load(std::memory_order_acquire); + size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize; + + // Let's construct prolog with timestamp and thread id + auto threadId = TMemoryLog::GetTheadId(); + + // alignment required by NoCacheMemcpy + // check for format for snprintf + constexpr size_t prologSize = 48; alignas(TMemoryLog::MemcpyAlignment) char prolog[prologSize + 1]; Y_VERIFY(AlignDown(&prolog, TMemoryLog::MemcpyAlignment) == &prolog); - - int snprintfResult = snprintf(prolog, prologSize + 1, + + int snprintfResult = snprintf(prolog, prologSize + 1, "TS %020" PRIu64 " TI %020" PRIu64 " ", GetCycleCountFast(), threadId); - - if (snprintfResult < 0) { - return false; - } - Y_VERIFY(snprintfResult == prologSize); - - amount += prologSize; - if (addLF) { - ++amount; // add 1 byte for \n at the end of the message - } - - char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount); - if (buffer == nullptr) { - return false; - } - -#if defined(_x86_64_) || defined(_i386_) + + if (snprintfResult < 0) { + return false; + } + Y_VERIFY(snprintfResult == prologSize); + + amount += prologSize; + if (addLF) { + ++amount; // add 1 byte for \n at the end of the message + } + + char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount); + if (buffer == nullptr) { + return false; + } + +#if defined(_x86_64_) || defined(_i386_) // warning: copy prolog first to avoid corruption of the message // by prolog tail NoCacheMemcpy(buffer, prolog, prologSize); if (AlignDown(begin + prologSize, TMemoryLog::MemcpyAlignment) == begin + prologSize) { NoCacheMemcpy(buffer + prologSize, begin, msgSize); - } else { + } else { NoWCacheMemcpy(buffer + prologSize, begin, msgSize); } #else memcpy(buffer, prolog, prologSize); memcpy(buffer + prologSize, begin, msgSize); #endif - - if (addLF) { - buffer[prologSize + msgSize] = '\n'; - } - - if (lastMark) { - TMemoryLog::ChangeLastMark(buffer + prologSize + msgSize + (int)addLF); - } - - ClearAlignedTail(buffer + amount); - return true; -} - -NO_SANITIZE_THREAD -void TMemoryLog::ChangeLastMark(char* buffer) noexcept { - memcpy(buffer, DEFAULT_LAST_MARK, LAST_MARK_SIZE); - auto oldMark = AtomicSwap(&LastMarkIsHere, buffer); - if (Y_LIKELY(oldMark != nullptr)) { - memcpy(oldMark, CLEAR_MARK, LAST_MARK_SIZE); - } - if (AtomicGet(LastMarkIsHere) != buffer) { - memcpy(buffer, CLEAR_MARK, LAST_MARK_SIZE); - AtomicBarrier(); - } -} - -bool MemLogVPrintF(const char* format, va_list params) noexcept { - auto logger = TMemoryLog::GetMemoryLogger(); - if (logger == nullptr) { - return false; - } - - auto threadId = TMemoryLog::GetTheadId(); - - // alignment required by NoCacheMemcpy + + if (addLF) { + buffer[prologSize + msgSize] = '\n'; + } + + if (lastMark) { + TMemoryLog::ChangeLastMark(buffer + prologSize + msgSize + (int)addLF); + } + + ClearAlignedTail(buffer + amount); + return true; +} + +NO_SANITIZE_THREAD +void TMemoryLog::ChangeLastMark(char* buffer) noexcept { + memcpy(buffer, DEFAULT_LAST_MARK, LAST_MARK_SIZE); + auto oldMark = AtomicSwap(&LastMarkIsHere, buffer); + if (Y_LIKELY(oldMark != nullptr)) { + memcpy(oldMark, CLEAR_MARK, LAST_MARK_SIZE); + } + if (AtomicGet(LastMarkIsHere) != buffer) { + memcpy(buffer, CLEAR_MARK, LAST_MARK_SIZE); + AtomicBarrier(); + } +} + +bool MemLogVPrintF(const char* format, va_list params) noexcept { + auto logger = TMemoryLog::GetMemoryLogger(); + if (logger == nullptr) { + return false; + } + + auto threadId = TMemoryLog::GetTheadId(); + + // alignment required by NoCacheMemcpy alignas(TMemoryLog::MemcpyAlignment) char buf[TMemoryLog::MAX_MESSAGE_SIZE]; Y_VERIFY(AlignDown(&buf, TMemoryLog::MemcpyAlignment) == &buf); - + int prologSize = snprintf(buf, TMemoryLog::MAX_MESSAGE_SIZE - 2, "TS %020" PRIu64 " TI %020" PRIu64 " ", GetCycleCountFast(), threadId); - - if (Y_UNLIKELY(prologSize < 0)) { - return false; - } - Y_VERIFY((ui32)prologSize <= TMemoryLog::MAX_MESSAGE_SIZE); - - int add = vsnprintf( + + if (Y_UNLIKELY(prologSize < 0)) { + return false; + } + Y_VERIFY((ui32)prologSize <= TMemoryLog::MAX_MESSAGE_SIZE); + + int add = vsnprintf( &buf[prologSize], - TMemoryLog::MAX_MESSAGE_SIZE - prologSize - 2, - format, params); - - if (Y_UNLIKELY(add < 0)) { - return false; - } - Y_VERIFY(add >= 0); - auto totalSize = prologSize + add; - + TMemoryLog::MAX_MESSAGE_SIZE - prologSize - 2, + format, params); + + if (Y_UNLIKELY(add < 0)) { + return false; + } + Y_VERIFY(add >= 0); + auto totalSize = prologSize + add; + buf[totalSize++] = '\n'; - Y_VERIFY((ui32)totalSize <= TMemoryLog::MAX_MESSAGE_SIZE); - + Y_VERIFY((ui32)totalSize <= TMemoryLog::MAX_MESSAGE_SIZE); + return BareMemLogWrite(buf, totalSize) != nullptr; -} +} |