diff options
author | agri <agri@yandex-team.ru> | 2022-02-10 16:48:12 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:48:12 +0300 |
commit | d3530b2692e400bd4d29bd4f07cafaee139164e7 (patch) | |
tree | b7ae636a74490e649a2ed0fdd5361f1bec83b9f9 /library/cpp | |
parent | 0f4c5d1e8c0672bf0a1f2f2d8acac5ba24772435 (diff) | |
download | ydb-d3530b2692e400bd4d29bd4f07cafaee139164e7.tar.gz |
Restoring authorship annotation for <agri@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp')
83 files changed, 5689 insertions, 5689 deletions
diff --git a/library/cpp/actors/core/actor_bootstrapped.h b/library/cpp/actors/core/actor_bootstrapped.h index a37887c939..e15bb86ce6 100644 --- a/library/cpp/actors/core/actor_bootstrapped.h +++ b/library/cpp/actors/core/actor_bootstrapped.h @@ -28,8 +28,8 @@ namespace NActors { } else { static_assert(dependent_false<TDerived>::value, "No correct Bootstrap() signature"); } - } - + } + TActorBootstrapped() : TActor<TDerived>(&TDerived::StateBootstrap) {} diff --git a/library/cpp/actors/core/actorsystem.h b/library/cpp/actors/core/actorsystem.h index 40499d7586..58d360edcc 100644 --- a/library/cpp/actors/core/actorsystem.h +++ b/library/cpp/actors/core/actorsystem.h @@ -129,7 +129,7 @@ namespace NActors { virtual void SetRealTimeMode() const {} }; - + // could be proxy to in-pool schedulers (for NUMA-aware executors) class ISchedulerThread : TNonCopyable { public: @@ -352,7 +352,7 @@ namespace NActors { NLog::TSettings* LoggerSettings() const { return LoggerSettings0.Get(); } - + void GetPoolStats(ui32 poolId, TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const; void DeferPreStop(std::function<void()> fn) { @@ -360,8 +360,8 @@ namespace NActors { } /* This is the base for memory profiling tags. - System sets memory profiling tag for debug version of lfalloc. - The tag is set as "base_tag + actor_activity_type". */ + System sets memory profiling tag for debug version of lfalloc. + The tag is set as "base_tag + actor_activity_type". */ static ui32 MemProfActivityBase; }; } diff --git a/library/cpp/actors/core/event.cpp b/library/cpp/actors/core/event.cpp index 33f8ce2aaf..1c05ffc3fe 100644 --- a/library/cpp/actors/core/event.cpp +++ b/library/cpp/actors/core/event.cpp @@ -1,7 +1,7 @@ #include "event.h" -#include "event_pb.h" - -namespace NActors { +#include "event_pb.h" + +namespace NActors { const TScopeId TScopeId::LocallyGenerated{ Max<ui64>(), Max<ui64>() @@ -22,8 +22,8 @@ namespace NActors { return chainBuf; } return new TEventSerializedData; - } - + } + TIntrusivePtr<TEventSerializedData> IEventHandle::GetChainBuffer() { if (Buffer) return Buffer; @@ -34,5 +34,5 @@ namespace NActors { return Buffer; } return new TEventSerializedData; - } -} + } +} diff --git a/library/cpp/actors/core/event.h b/library/cpp/actors/core/event.h index 6ff02aaf94..081549071d 100644 --- a/library/cpp/actors/core/event.h +++ b/library/cpp/actors/core/event.h @@ -3,7 +3,7 @@ #include "defs.h" #include "actorid.h" #include "callstack.h" -#include "event_load.h" +#include "event_load.h" #include <library/cpp/actors/wilson/wilson_trace.h> @@ -17,13 +17,13 @@ namespace NActors { public: virtual bool SerializeToArcadiaStream(TChunkSerializer*) const = 0; }; - + class IEventBase : TNonCopyable, public ISerializerToStream { public: // actual typing is performed by IEventHandle - + virtual ~IEventBase() { } @@ -87,7 +87,7 @@ namespace NActors { Buffer.Reset(); return x; } - + enum EFlags { FlagTrackDelivery = 1 << 0, FlagForwardOnNondelivery = 1 << 1, @@ -236,7 +236,7 @@ namespace NActors { , RewriteType(Type) { } - + TIntrusivePtr<TEventSerializedData> GetChainBuffer(); TIntrusivePtr<TEventSerializedData> ReleaseChainBuffer(); @@ -248,15 +248,15 @@ namespace NActors { } else { return 0; } - } + } bool HasBuffer() const { return bool(Buffer); - } + } bool HasEvent() const { return bool(Event); - } + } IEventBase* GetBase() { if (!Event) { @@ -326,7 +326,7 @@ namespace NActors { } \ bool IsSerializable() const override { \ return false; \ - } + } #define DEFINE_SIMPLE_NONLOCAL_EVENT(eventType, header) \ TString ToStringHeader() const override { \ @@ -340,5 +340,5 @@ namespace NActors { } \ bool IsSerializable() const override { \ return true; \ - } + } } diff --git a/library/cpp/actors/core/event_load.h b/library/cpp/actors/core/event_load.h index 0dab1dd374..da2adc28ea 100644 --- a/library/cpp/actors/core/event_load.h +++ b/library/cpp/actors/core/event_load.h @@ -1,24 +1,24 @@ -#pragma once +#pragma once #include <util/stream/walk.h> -#include <util/system/types.h> +#include <util/system/types.h> #include <util/generic/string.h> #include <library/cpp/actors/util/rope.h> #include <library/cpp/actors/wilson/wilson_trace.h> - -namespace NActors { + +namespace NActors { class IEventHandle; - + struct TConstIoVec { const void* Data; size_t Size; }; - + struct TIoVec { void* Data; size_t Size; }; - + class TEventSerializedData : public TThrRefBase { @@ -70,7 +70,7 @@ namespace NActors { } return result; } - + TRope EraseBack(size_t count) { Y_VERIFY(count <= Rope.GetSize()); TRope::TIterator iter = Rope.End(); @@ -81,25 +81,25 @@ namespace NActors { void Append(TRope&& from) { Rope.Insert(Rope.End(), std::move(from)); } - + void Append(TString buffer) { if (buffer) { Rope.Insert(Rope.End(), TRope(std::move(buffer))); } } }; -} - +} + class TChainBufWalk : public IWalkInput { TIntrusivePtr<NActors::TEventSerializedData> Buffer; TRope::TConstIterator Iter; - + public: TChainBufWalk(TIntrusivePtr<NActors::TEventSerializedData> buffer) : Buffer(std::move(buffer)) , Iter(Buffer->GetBeginIter()) {} - + private: size_t DoUnboundedNext(const void **ptr) override { const size_t size = Iter.ContiguousSize(); @@ -108,5 +108,5 @@ private: Iter.AdvanceToNextContiguousBlock(); } return size; - } + } }; diff --git a/library/cpp/actors/core/event_local.h b/library/cpp/actors/core/event_local.h index 2845aa94dd..2a4ff9fa55 100644 --- a/library/cpp/actors/core/event_local.h +++ b/library/cpp/actors/core/event_local.h @@ -2,7 +2,7 @@ #include "event.h" #include "scheduler_cookie.h" -#include "event_load.h" +#include "event_load.h" #include <util/system/type_name.h> namespace NActors { diff --git a/library/cpp/actors/core/event_pb.cpp b/library/cpp/actors/core/event_pb.cpp index 018ff9ac34..bae0a0a64b 100644 --- a/library/cpp/actors/core/event_pb.cpp +++ b/library/cpp/actors/core/event_pb.cpp @@ -1,6 +1,6 @@ -#include "event_pb.h" - -namespace NActors { +#include "event_pb.h" + +namespace NActors { bool TRopeStream::Next(const void** data, int* size) { *data = Iter.ContiguousData(); *size = Iter.ContiguousSize(); @@ -13,13 +13,13 @@ namespace NActors { TotalByteCount += *size; return *size != 0; } - + void TRopeStream::BackUp(int count) { Y_VERIFY(count <= TotalByteCount); Iter -= count; TotalByteCount -= count; } - + bool TRopeStream::Skip(int count) { if (static_cast<size_t>(TotalByteCount + count) > Size) { count = Size - TotalByteCount; @@ -27,20 +27,20 @@ namespace NActors { Iter += count; TotalByteCount += count; return static_cast<size_t>(TotalByteCount) != Size; - } - + } + TCoroutineChunkSerializer::TCoroutineChunkSerializer() : TotalSerializedDataSize(0) , Stack(64 * 1024) , SelfClosure{this, TArrayRef(Stack.Begin(), Stack.End())} , InnerContext(SelfClosure) {} - + TCoroutineChunkSerializer::~TCoroutineChunkSerializer() { CancelFlag = true; Resume(); Y_VERIFY(Finished); - } + } bool TCoroutineChunkSerializer::AllowsAliasing() const { return true; @@ -85,10 +85,10 @@ namespace NActors { } else { InnerContext.SwitchTo(BufFeedContext); } - } + } return true; - } - + } + bool TCoroutineChunkSerializer::Next(void** data, int* size) { if (CancelFlag || AbortFlag) { return false; @@ -122,15 +122,15 @@ namespace NActors { BufferPtr -= count; SizeRemain += count; TotalSerializedDataSize -= count; - } - + } + void TCoroutineChunkSerializer::Resume() { TContMachineContext feedContext; BufFeedContext = &feedContext; feedContext.SwitchTo(&InnerContext); BufFeedContext = nullptr; - } - + } + bool TCoroutineChunkSerializer::WriteRope(const TRope *rope) { for (auto iter = rope->Begin(); iter.Valid(); iter.AdvanceToNextContiguousBlock()) { if (!WriteAliasedRaw(iter.ContiguousData(), iter.ContiguousSize())) { @@ -156,14 +156,14 @@ namespace NActors { return {Chunks, Chunks + NumChunks}; } - + void TCoroutineChunkSerializer::SetSerializingEvent(const IEventBase *event) { Y_VERIFY(Event == nullptr); Event = event; TotalSerializedDataSize = 0; AbortFlag = false; } - + void TCoroutineChunkSerializer::Abort() { Y_VERIFY(Event); AbortFlag = true; @@ -181,8 +181,8 @@ namespace NActors { } Finished = true; InnerContext.SwitchTo(BufFeedContext); - } - + } + bool TAllocChunkSerializer::Next(void** pdata, int* psize) { if (Backup) { // we have some data in backup rope -- move the first chunk from the backup rope to the buffer and return @@ -200,12 +200,12 @@ namespace NActors { Buffers->Append(TRope(std::move(item))); } return true; - } - + } + void TAllocChunkSerializer::BackUp(int count) { Backup.Insert(Backup.Begin(), Buffers->EraseBack(count)); } - + bool TAllocChunkSerializer::WriteAliasedRaw(const void*, int) { Y_VERIFY(false); return false; diff --git a/library/cpp/actors/core/event_pb.h b/library/cpp/actors/core/event_pb.h index d7546b901a..1c69d7e9bf 100644 --- a/library/cpp/actors/core/event_pb.h +++ b/library/cpp/actors/core/event_pb.h @@ -1,15 +1,15 @@ #pragma once #include "event.h" -#include "event_load.h" - +#include "event_load.h" + #include <google/protobuf/io/zero_copy_stream.h> #include <google/protobuf/arena.h> #include <library/cpp/actors/protos/actors.pb.h> -#include <util/generic/deque.h> -#include <util/system/context.h> -#include <util/system/filemap.h> -#include <array> +#include <util/generic/deque.h> +#include <util/system/context.h> +#include <util/system/filemap.h> +#include <array> namespace NActors { @@ -29,11 +29,11 @@ namespace NActors { int64_t ByteCount() const override { return TotalByteCount; } - + private: int64_t TotalByteCount = 0; }; - + class TChunkSerializer : public NProtoBuf::io::ZeroCopyOutputStream { public: TChunkSerializer() = default; @@ -42,7 +42,7 @@ namespace NActors { virtual bool WriteRope(const TRope *rope) = 0; virtual bool WriteString(const TString *s) = 0; }; - + class TAllocChunkSerializer final : public TChunkSerializer { public: bool Next(void** data, int* size) override; @@ -51,7 +51,7 @@ namespace NActors { return Buffers->GetSize(); } bool WriteAliasedRaw(const void* data, int size) override; - + // WARNING: these methods require owner to retain ownership and immutability of passed objects bool WriteRope(const TRope *rope) override; bool WriteString(const TString *s) override; @@ -62,19 +62,19 @@ namespace NActors { } return std::move(Buffers); } - + protected: TIntrusivePtr<TEventSerializedData> Buffers = new TEventSerializedData; TRope Backup; }; - + class TCoroutineChunkSerializer final : public TChunkSerializer, protected ITrampoLine { public: using TChunk = std::pair<const char*, size_t>; TCoroutineChunkSerializer(); ~TCoroutineChunkSerializer(); - + void SetSerializingEvent(const IEventBase *event); void Abort(); std::pair<TChunk*, TChunk*> FeedBuf(void* data, size_t size); @@ -87,7 +87,7 @@ namespace NActors { const IEventBase *GetCurrentEvent() const { return Event; } - + bool Next(void** data, int* size) override; void BackUp(int count) override; int64_t ByteCount() const override { @@ -95,7 +95,7 @@ namespace NActors { } bool WriteAliasedRaw(const void* data, int size) override; bool AllowsAliasing() const override; - + bool WriteRope(const TRope *rope) override; bool WriteString(const TString *s) override; @@ -103,7 +103,7 @@ namespace NActors { void DoRun() override; void Resume(); bool Produce(const void *data, size_t size); - + i64 TotalSerializedDataSize; TMappedAllocation Stack; TContClosure SelfClosure; @@ -120,7 +120,7 @@ namespace NActors { bool SerializationSuccess; bool Finished = false; }; - + #ifdef ACTORLIB_HUGE_PB_SIZE static const size_t EventMaxByteSize = 140 << 20; // (140MB) #else @@ -137,9 +137,9 @@ namespace NActors { public: using ProtoRecordType = TRecord; - + TEventPBBase() = default; - + explicit TEventPBBase(const TRecord& rec) { Record = rec; @@ -153,7 +153,7 @@ namespace NActors { TString ToStringHeader() const override { return Record.GetTypeName(); } - + TString ToString() const override { return Record.ShortDebugString(); } @@ -274,7 +274,7 @@ namespace NActors { ev->CachedByteSize = input->GetSize(); return ev.Release(); } - + size_t GetCachedByteSize() const { if (CachedByteSize == 0) { CachedByteSize = CalculateSerializedSize(); diff --git a/library/cpp/actors/core/events.h b/library/cpp/actors/core/events.h index 702cf50fad..88103e888c 100644 --- a/library/cpp/actors/core/events.h +++ b/library/cpp/actors/core/events.h @@ -1,11 +1,11 @@ #pragma once #include "event.h" -#include "event_pb.h" +#include "event_pb.h" #include <library/cpp/actors/protos/actors.pb.h> #include <util/system/unaligned_mem.h> - + namespace NActors { struct TEvents { enum EEventSpace { @@ -213,7 +213,7 @@ namespace NActors { using TEvPoisonPill = TEvPoison; // Legacy name, deprecated using TEvActorDied = TEvGone; - }; + }; } template <> diff --git a/library/cpp/actors/core/executelater.h b/library/cpp/actors/core/executelater.h index e7a13c1005..53da592373 100644 --- a/library/cpp/actors/core/executelater.h +++ b/library/cpp/actors/core/executelater.h @@ -1,10 +1,10 @@ -#pragma once - -#include "actor_bootstrapped.h" - -#include <utility> - -namespace NActors { +#pragma once + +#include "actor_bootstrapped.h" + +#include <utility> + +namespace NActors { template <typename TCallback> class TExecuteLater: public TActorBootstrapped<TExecuteLater<TCallback>> { public: @@ -13,10 +13,10 @@ namespace NActors { } TExecuteLater( - TCallback&& callback, - IActor::EActivityType activityType, - ui32 channel = 0, - ui64 cookie = 0, + TCallback&& callback, + IActor::EActivityType activityType, + ui32 channel = 0, + ui64 cookie = 0, const TActorId& reportCompletionTo = TActorId(), const TActorId& reportExceptionTo = TActorId()) noexcept : Callback(std::move(callback)) @@ -27,16 +27,16 @@ namespace NActors { { this->SetActivityType(activityType); } - + void Bootstrap(const TActorContext& ctx) noexcept { try { { /* RAII, Callback should be destroyed right before sending - TEvCallbackCompletion */ - + TEvCallbackCompletion */ + auto local = std::move(Callback); using T = decltype(local); - + if constexpr (std::is_invocable_v<T, const TActorContext&>) { local(ctx); } else { @@ -56,11 +56,11 @@ namespace NActors { new TEvents::TEvCallbackException(ctx.SelfID, msg), Channel, Cookie); } - } - + } + this->Die(ctx); - } - + } + private: TCallback Callback; const ui32 Channel; @@ -68,13 +68,13 @@ namespace NActors { const TActorId ReportCompletionTo; const TActorId ReportExceptionTo; }; - + template <typename T> IActor* CreateExecuteLaterActor( - T&& func, - IActor::EActivityType activityType, - ui32 channel = 0, - ui64 cookie = 0, + T&& func, + IActor::EActivityType activityType, + ui32 channel = 0, + ui64 cookie = 0, const TActorId& reportCompletionTo = TActorId(), const TActorId& reportExceptionTo = TActorId()) noexcept { return new TExecuteLater<T>(std::forward<T>(func), @@ -84,4 +84,4 @@ namespace NActors { reportCompletionTo, reportExceptionTo); } -} +} diff --git a/library/cpp/actors/core/executor_pool_basic.cpp b/library/cpp/actors/core/executor_pool_basic.cpp index 4dce16939a..3123e9b1a6 100644 --- a/library/cpp/actors/core/executor_pool_basic.cpp +++ b/library/cpp/actors/core/executor_pool_basic.cpp @@ -4,23 +4,23 @@ #include <library/cpp/actors/util/affinity.h> #include <library/cpp/actors/util/datetime.h> -#ifdef _linux_ +#ifdef _linux_ #include <pthread.h> -#endif - +#endif + namespace NActors { LWTRACE_USING(ACTORLIB_PROVIDER); constexpr TDuration TBasicExecutorPool::DEFAULT_TIME_PER_MAILBOX; TBasicExecutorPool::TBasicExecutorPool( - ui32 poolId, - ui32 threads, - ui64 spinThreshold, + ui32 poolId, + ui32 threads, + ui64 spinThreshold, const TString& poolName, TAffinity* affinity, - TDuration timePerMailbox, - ui32 eventsPerMailbox, + TDuration timePerMailbox, + ui32 eventsPerMailbox, int realtimePriority, ui32 maxActivityType) : TExecutorPoolBase(poolId, threads, affinity, maxActivityType) @@ -330,10 +330,10 @@ namespace NActors { if (pthread_setschedparam(threadSelf, SCHED_FIFO, ¶m)) { Y_FAIL("Cannot set realtime priority"); } - } -#else + } +#else Y_UNUSED(RealtimePriority); -#endif +#endif } ui32 TBasicExecutorPool::GetThreadCount() const { diff --git a/library/cpp/actors/core/executor_pool_basic.h b/library/cpp/actors/core/executor_pool_basic.h index 023190f7fe..65ceed2669 100644 --- a/library/cpp/actors/core/executor_pool_basic.h +++ b/library/cpp/actors/core/executor_pool_basic.h @@ -62,7 +62,7 @@ namespace NActors { TAtomic ThreadUtilization; TAtomic MaxUtilizationCounter; TAtomic MaxUtilizationAccumulator; - + TAtomic ThreadCount; TMutex ChangeThreadsLock; @@ -81,7 +81,7 @@ namespace NActors { ui32 maxActivityType = 1); explicit TBasicExecutorPool(const TBasicExecutorPoolConfig& cfg); ~TBasicExecutorPool(); - + ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingReadCounter) override; void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override; diff --git a/library/cpp/actors/core/executor_pool_united.cpp b/library/cpp/actors/core/executor_pool_united.cpp index dac6245635..e5968609e7 100644 --- a/library/cpp/actors/core/executor_pool_united.cpp +++ b/library/cpp/actors/core/executor_pool_united.cpp @@ -14,7 +14,7 @@ #include <util/system/datetime.h> #include <util/system/hp_timer.h> - + #include <algorithm> namespace NActors { @@ -1315,7 +1315,7 @@ namespace NActors { if (Y_UNLIKELY(result == CpuStopped) || TryAcquireToken(result)) { break; // token acquired (or stop) } - } + } wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timeTracker.Elapsed()); return result; diff --git a/library/cpp/actors/core/executor_pool_united.h b/library/cpp/actors/core/executor_pool_united.h index a090ba2466..01be95b778 100644 --- a/library/cpp/actors/core/executor_pool_united.h +++ b/library/cpp/actors/core/executor_pool_united.h @@ -63,7 +63,7 @@ namespace NActors { // Sets executor for specified pool void SetupPool(TPoolId pool, IExecutorPool* executorPool, TMailboxTable* mailboxTable); - + // Add activation of newly scheduled mailbox and wake cpu to execute it if required void PushActivation(TPoolId pool, ui32 activation, ui64 revolvingCounter); @@ -72,7 +72,7 @@ namespace NActors { // Try to wake idle cpu waiting for tokens on specified pool void TryWake(TPoolId pool); - + // Get activation from pool; requires pool's token void BeginExecution(TPoolId pool, ui32& activation, ui64 revolvingCounter); diff --git a/library/cpp/actors/core/executor_thread.cpp b/library/cpp/actors/core/executor_thread.cpp index 446b651efd..ac97689f31 100644 --- a/library/cpp/actors/core/executor_thread.cpp +++ b/library/cpp/actors/core/executor_thread.cpp @@ -303,7 +303,7 @@ namespace NActors { ExecutorPool->SetRealTimeMode(); TAffinityGuard affinity(ExecutorPool->Affinity()); - + NHPTimer::STime hpnow = GetCycleCountFast(); NHPTimer::STime hpprev = hpnow; ui64 execCount = 0; diff --git a/library/cpp/actors/core/executor_thread.h b/library/cpp/actors/core/executor_thread.h index 9d3c573f0d..66b97bd351 100644 --- a/library/cpp/actors/core/executor_thread.h +++ b/library/cpp/actors/core/executor_thread.h @@ -45,7 +45,7 @@ namespace NActors { void UnregisterActor(TMailboxHeader* mailbox, ui64 localActorId); void DropUnregistered(); const std::vector<THolder<IActor>>& GetUnregistered() const { return DyingActors; } - + void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr); void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr); void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr); diff --git a/library/cpp/actors/core/log.cpp b/library/cpp/actors/core/log.cpp index 5f63b5af58..bfac7d30e4 100644 --- a/library/cpp/actors/core/log.cpp +++ b/library/cpp/actors/core/log.cpp @@ -195,7 +195,7 @@ namespace NActors { , Metrics(std::make_unique<TLoggerMetrics>(metrics)) { } - + TLoggerActor::TLoggerActor(TIntrusivePtr<NLog::TSettings> settings, std::shared_ptr<TLogBackend> logBackend, std::shared_ptr<NMonitoring::TMetricRegistry> metrics) @@ -260,8 +260,8 @@ namespace NActors { break; default: break; - } - + } + } void TLoggerActor::HandleLogEvent(NLog::TEvLog::TPtr& ev, const NActors::TActorContext& ctx) { diff --git a/library/cpp/actors/core/log.h b/library/cpp/actors/core/log.h index c11a7cf3c1..514ff51c14 100644 --- a/library/cpp/actors/core/log.h +++ b/library/cpp/actors/core/log.h @@ -42,7 +42,7 @@ actorCtxOrSystem, priority, component, __VA_ARGS__); \ } \ } while (0) /**/ - + #define LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, priority, component, sampleBy, stream) \ LOG_LOG_SAMPLED_BY(actorCtxOrSystem, priority, component, sampleBy, "%s", [&]() { \ TStringBuilder logStringBuilder; \ @@ -304,7 +304,7 @@ namespace NActors { ///////////////////////////////////////////////////////////////////// // Logging adaptors for memory log and logging into filesystem ///////////////////////////////////////////////////////////////////// - + namespace NDetail { inline void Y_PRINTF_FORMAT(2, 3) PrintfV(TString& dst, const char* format, ...) { va_list params; @@ -318,7 +318,7 @@ namespace NActors { } } // namespace NDetail - template <typename TCtx> + template <typename TCtx> inline void DeliverLogMessage(TCtx& ctx, NLog::EPriority mPriority, NLog::EComponent mComponent, TString &&str) { const NLog::TSettings *mSettings = ctx.LoggerSettings(); @@ -327,14 +327,14 @@ namespace NActors { } template <typename TCtx, typename... TArgs> - inline void MemLogAdapter( + inline void MemLogAdapter( TCtx& actorCtxOrSystem, NLog::EPriority mPriority, NLog::EComponent mComponent, const char* format, TArgs&&... params) { TString Formatted; - - + + if constexpr (sizeof... (params) > 0) { NDetail::PrintfV(Formatted, format, std::forward<TArgs>(params)...); } else { @@ -343,9 +343,9 @@ namespace NActors { MemLogWrite(Formatted.data(), Formatted.size(), true); DeliverLogMessage(actorCtxOrSystem, mPriority, mComponent, std::move(Formatted)); - } - - template <typename TCtx> + } + + template <typename TCtx> Y_WRAPPER inline void MemLogAdapter( TCtx& actorCtxOrSystem, NLog::EPriority mPriority, @@ -355,7 +355,7 @@ namespace NActors { MemLogWrite(str.data(), str.size(), true); DeliverLogMessage(actorCtxOrSystem, mPriority, mComponent, TString(str)); } - + template <typename TCtx> Y_WRAPPER inline void MemLogAdapter( TCtx& actorCtxOrSystem, @@ -365,5 +365,5 @@ namespace NActors { MemLogWrite(str.data(), str.size(), true); DeliverLogMessage(actorCtxOrSystem, mPriority, mComponent, std::move(str)); - } + } } diff --git a/library/cpp/actors/core/mailbox.cpp b/library/cpp/actors/core/mailbox.cpp index d84b4f9e46..ac598eff86 100644 --- a/library/cpp/actors/core/mailbox.cpp +++ b/library/cpp/actors/core/mailbox.cpp @@ -214,49 +214,49 @@ namespace NActors { return true; case TMailboxType::HTSwap: { THTSwapMailbox* const mailbox = THTSwapMailbox::Get(lineHint, x); -#if (!defined(_tsan_enabled_)) +#if (!defined(_tsan_enabled_)) Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType); -#endif +#endif mailbox->Queue.Push(ev.Release()); if (mailbox->MarkForSchedule()) { RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast()); executorPool->ScheduleActivation(hint); } - } + } return true; case TMailboxType::ReadAsFilled: { if (lineHint > TReadAsFilledMailbox::MaxMailboxesInLine()) return false; - + TReadAsFilledMailbox* const mailbox = TReadAsFilledMailbox::Get(lineHint, x); -#if (!defined(_tsan_enabled_)) +#if (!defined(_tsan_enabled_)) Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType); -#endif +#endif mailbox->Queue.Push(ev.Release()); if (mailbox->MarkForSchedule()) { RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast()); executorPool->ScheduleActivation(hint); } - } + } return true; case TMailboxType::TinyReadAsFilled: { if (lineHint > TTinyReadAsFilledMailbox::MaxMailboxesInLine()) return false; - + TTinyReadAsFilledMailbox* const mailbox = TTinyReadAsFilledMailbox::Get(lineHint, x); -#if (!defined(_tsan_enabled_)) +#if (!defined(_tsan_enabled_)) Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType); -#endif +#endif mailbox->Queue.Push(ev.Release()); if (mailbox->MarkForSchedule()) { RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast()); executorPool->ScheduleActivation(hint); } - } + } return true; default: Y_FAIL("unknown mailbox type"); - } + } } return false; diff --git a/library/cpp/actors/core/mailbox.h b/library/cpp/actors/core/mailbox.h index 0bd9c4d314..8a2c0d0608 100644 --- a/library/cpp/actors/core/mailbox.h +++ b/library/cpp/actors/core/mailbox.h @@ -10,7 +10,7 @@ #include <library/cpp/threading/queue/mpsc_read_as_filled.h> #include <util/generic/hash.h> #include <util/system/hp_timer.h> -#include <util/generic/ptr.h> +#include <util/generic/ptr.h> // TODO: clean all broken arcadia atomic stuff and replace with intrinsics namespace NActors { @@ -389,52 +389,52 @@ namespace NActors { constexpr static ui32 AlignedSize() { return ((sizeof(TRevolvingMailbox) + 63) / 64) * 64; } - + std::pair<ui32, ui32> CountRevolvingMailboxEvents(ui64 localActorId, ui32 maxTraverse); bool CleanupEvents(); }; - + static_assert(sizeof(TRevolvingMailbox) == 128, "expect sizeof(TRevolvingMailbox) == 128"); - + struct THTSwapMailbox: public TMailboxHeader { using TQueueType = NThreading::THTSwapQueue<IEventHandle*>; - + TQueueType Queue; NHPTimer::STime ScheduleMoment; char Padding_[16]; - + THTSwapMailbox() : TMailboxHeader(TMailboxType::HTSwap) , ScheduleMoment(0) { } - + ~THTSwapMailbox() { CleanupEvents(); } - + IEventHandle* Pop() { return Queue.Pop(); } - + IEventHandle* Head() { return Queue.Peek(); } - + static THTSwapMailbox* Get(ui32 hint, void* line) { return (THTSwapMailbox*)((ui8*)line + 64 + (hint - 1) * 64); } - + constexpr static ui64 MaxMailboxesInLine() { return (LineSize - 64) / AlignedSize(); } - + static const TMailboxType::EType MailboxType = TMailboxType::HTSwap; - + constexpr static ui32 AlignedSize() { return ((sizeof(THTSwapMailbox) + 63) / 64) * 64; } - + bool CleanupEvents() { const bool done = (Queue.Peek() == nullptr); while (IEventHandle* ev = Queue.Pop()) @@ -442,50 +442,50 @@ namespace NActors { return done; } }; - + static_assert(sizeof(THTSwapMailbox) == 64, "expect sizeof(THTSwapMailbox) == 64"); - + struct TReadAsFilledMailbox: public TMailboxHeader { using TQueueType = NThreading::TReadAsFilledQueue<IEventHandle>; - + TQueueType Queue; NHPTimer::STime ScheduleMoment; char Padding_[8]; - + TReadAsFilledMailbox() : TMailboxHeader(TMailboxType::ReadAsFilled) , ScheduleMoment(0) { } - + ~TReadAsFilledMailbox() { CleanupEvents(); } - + IEventHandle* Pop() { return Queue.Pop(); } - + IEventHandle* Head() { return Queue.Peek(); } - + static TReadAsFilledMailbox* Get(ui32 hint, void* line) { return (TReadAsFilledMailbox*)((ui8*)line + 64 + (hint - 1) * 192); } - + constexpr static ui64 MaxMailboxesInLine() { return (LineSize - 64) / AlignedSize(); } - + static const TMailboxType::EType MailboxType = TMailboxType::ReadAsFilled; - + constexpr static ui32 AlignedSize() { return ((sizeof(TReadAsFilledMailbox) + 63) / 64) * 64; } - + bool CleanupEvents() { const bool done = (Queue.Peek() == nullptr); while (IEventHandle* ev = Queue.Pop()) @@ -493,52 +493,52 @@ namespace NActors { return done; } }; - + static_assert(sizeof(TReadAsFilledMailbox) == 192, "expect sizeof(TReadAsFilledMailbox) == 192"); - + struct TTinyReadAsFilledMailbox: public TMailboxHeader { using TQueueType = NThreading::TReadAsFilledQueue< IEventHandle, NThreading::TRaFQueueBunchSize<4>>; - + TQueueType Queue; NHPTimer::STime ScheduleMoment; char Padding_[8]; - + TTinyReadAsFilledMailbox() : TMailboxHeader(TMailboxType::TinyReadAsFilled) , ScheduleMoment(0) { } - + ~TTinyReadAsFilledMailbox() { CleanupEvents(); } - + IEventHandle* Pop() { return Queue.Pop(); } - + IEventHandle* Head() { return Queue.Peek(); } - + static TTinyReadAsFilledMailbox* Get(ui32 hint, void* line) { return (TTinyReadAsFilledMailbox*)((ui8*)line + 64 + (hint - 1) * 192); } - + constexpr static ui64 MaxMailboxesInLine() { return (LineSize - 64) / AlignedSize(); } - + static const TMailboxType::EType MailboxType = TMailboxType::TinyReadAsFilled; - + constexpr static ui32 AlignedSize() { return ((sizeof(TTinyReadAsFilledMailbox) + 63) / 64) * 64; } - + bool CleanupEvents() { const bool done = (Queue.Peek() == nullptr); while (IEventHandle* ev = Queue.Pop()) @@ -546,8 +546,8 @@ namespace NActors { return done; } }; - + static_assert(sizeof(TTinyReadAsFilledMailbox) == 192, "expect sizeof(TTinyReadAsFilledMailbox) == 192"); - }; + }; } diff --git a/library/cpp/actors/core/mon.h b/library/cpp/actors/core/mon.h index c450f2338e..3ebf6a0bed 100644 --- a/library/cpp/actors/core/mon.h +++ b/library/cpp/actors/core/mon.h @@ -123,7 +123,7 @@ namespace NActors { return true; } - static IEventBase* Load(TEventSerializedData* bufs) { + static IEventBase* Load(TEventSerializedData* bufs) { return new TEvRemoteHttpInfo(bufs->GetString()); } @@ -160,7 +160,7 @@ namespace NActors { return true; } - static IEventBase* Load(TEventSerializedData* bufs) { + static IEventBase* Load(TEventSerializedData* bufs) { return new TEvRemoteHttpInfoRes(bufs->GetString()); } }; @@ -192,7 +192,7 @@ namespace NActors { return true; } - static IEventBase* Load(TEventSerializedData* bufs) { + static IEventBase* Load(TEventSerializedData* bufs) { return new TEvRemoteJsonInfoRes(bufs->GetString()); } }; diff --git a/library/cpp/actors/core/mon_stats.h b/library/cpp/actors/core/mon_stats.h index d55552af0c..f1d66664b6 100644 --- a/library/cpp/actors/core/mon_stats.h +++ b/library/cpp/actors/core/mon_stats.h @@ -13,17 +13,17 @@ namespace NActors { inline void Add(ui64 val, ui64 inc = 1) { size_t ind = 0; -#if defined(__clang__) && __clang_major__ == 3 && __clang_minor__ == 7 +#if defined(__clang__) && __clang_major__ == 3 && __clang_minor__ == 7 asm volatile("" :: : "memory"); -#endif +#endif if (val > 1) { ind = GetValueBitCount(val - 1); } -#if defined(__clang__) && __clang_major__ == 3 && __clang_minor__ == 7 +#if defined(__clang__) && __clang_major__ == 3 && __clang_minor__ == 7 asm volatile("" :: : "memory"); -#endif +#endif RelaxedStore(&TotalSamples, RelaxedLoad(&TotalSamples) + inc); RelaxedStore(&Buckets[ind], RelaxedLoad(&Buckets[ind]) + inc); } diff --git a/library/cpp/actors/core/ya.make b/library/cpp/actors/core/ya.make index 880a9d00db..22155dbeec 100644 --- a/library/cpp/actors/core/ya.make +++ b/library/cpp/actors/core/ya.make @@ -32,8 +32,8 @@ SRCS( ask.h balancer.h balancer.cpp - buffer.cpp - buffer.h + buffer.cpp + buffer.h callstack.cpp callstack.h config.h @@ -45,7 +45,7 @@ SRCS( event.h event_load.h event_local.h - event_pb.cpp + event_pb.cpp event_pb.h events.h events_undelivered.cpp diff --git a/library/cpp/actors/dnscachelib/dnscache.cpp b/library/cpp/actors/dnscachelib/dnscache.cpp index 649339ddb2..580956c92e 100644 --- a/library/cpp/actors/dnscachelib/dnscache.cpp +++ b/library/cpp/actors/dnscachelib/dnscache.cpp @@ -155,19 +155,19 @@ void TDnsCache::GetStats(ui64& a_cache_hits, ui64& a_cache_misses, } bool TDnsCache::THost::IsStale(int family, const TDnsCache* ctx) const noexcept { - time_t resolved = family == AF_INET ? ResolvedV4 : ResolvedV6; - time_t notfound = family == AF_INET ? NotFoundV4 : NotFoundV6; - - if (TTimeKeeper::GetTime() - resolved < ctx->EntryLifetime) - return false; - - if (TTimeKeeper::GetTime() - notfound < ctx->NegativeLifetime) - return false; - - return true; -} - -const TDnsCache::THost& + time_t resolved = family == AF_INET ? ResolvedV4 : ResolvedV6; + time_t notfound = family == AF_INET ? NotFoundV4 : NotFoundV6; + + if (TTimeKeeper::GetTime() - resolved < ctx->EntryLifetime) + return false; + + if (TTimeKeeper::GetTime() - notfound < ctx->NegativeLifetime) + return false; + + return true; +} + +const TDnsCache::THost& TDnsCache::Resolve(const TString& hostname, int family, bool cacheOnly) { if (!ValidateHName(hostname)) { LWPROBE(ResolveNullHost, hostname, family); @@ -182,7 +182,7 @@ TDnsCache::Resolve(const TString& hostname, int family, bool cacheOnly) { TGuard<TMutex> lock(CacheMtx); p = HostCache.find(hostname); if (p != HostCache.end()) { - if (!p->second.IsStale(family, this)) { + if (!p->second.IsStale(family, this)) { /* Recently resolved, just return cached value */ ACacheHits += 1; THost& host = p->second; @@ -199,9 +199,9 @@ TDnsCache::Resolve(const TString& hostname, int family, bool cacheOnly) { ACacheMisses += 1; } - if (cacheOnly) - return NullHost; - + if (cacheOnly) + return NullHost; + TAtomic& inprogress = (family == AF_INET ? p->second.InProgressV4 : p->second.InProgressV6); { @@ -219,7 +219,7 @@ TDnsCache::Resolve(const TString& hostname, int family, bool cacheOnly) { ctx->Hostname = hostname; ctx->Family = family; - AtomicSet(inprogress, 1); + AtomicSet(inprogress, 1); ares_gethostbyname(chan, hostname.c_str(), family, &TDnsCache::GHBNCallback, ctx); } @@ -269,7 +269,7 @@ const TDnsCache::TAddr& TDnsCache::ResolveAddr(const in6_addr& addr, int family) ctx->Owner = this; ctx->Addr = addr; - AtomicSet(p->second.InProgress, 1); + AtomicSet(p->second.InProgress, 1); ares_gethostbyaddr(chan, &addr, family == AF_INET ? sizeof(in_addr) : sizeof(in6_addr), family, &TDnsCache::GHBACallback, ctx); @@ -284,7 +284,7 @@ const TDnsCache::TAddr& TDnsCache::ResolveAddr(const in6_addr& addr, int family) void TDnsCache::WaitTask(TAtomic& flag) { const TInstant start = TInstant(TTimeKeeper::GetTimeval()); - while (AtomicGet(flag)) { + while (AtomicGet(flag)) { ares_channel chan = static_cast<ares_channel>(Channel); struct pollfd pfd[ARES_GETSOCK_MAXNUM]; @@ -380,7 +380,7 @@ void TDnsCache::GHBNCallback(void* arg, int status, int, struct hostent* info) { */ p->second.ResolvedV4 = TTimeKeeper::GetTime(); p->second.ResolvedV4 = 0; - AtomicSet(p->second.InProgressV4, 0); + AtomicSet(p->second.InProgressV4, 0); } else if (info->h_addrtype == AF_INET6) { p->second.AddrsV6.clear(); for (int i = 0; info->h_addr_list[i] != nullptr; i++) { @@ -395,7 +395,7 @@ void TDnsCache::GHBNCallback(void* arg, int status, int, struct hostent* info) { notfound = TTimeKeeper::GetTime(); resolved = 0; } - AtomicSet(inprogress, 0); + AtomicSet(inprogress, 0); } void TDnsCache::GHBACallback(void* arg, int status, int, struct hostent* info) { @@ -413,7 +413,7 @@ void TDnsCache::GHBACallback(void* arg, int status, int, struct hostent* info) { p->second.NotFound = TTimeKeeper::GetTime(); p->second.Resolved = 0; } - AtomicSet(p->second.InProgress, 0); + AtomicSet(p->second.InProgress, 0); } TString TDnsCache::THost::AddrsV4ToString() const { @@ -441,5 +441,5 @@ TString TDnsCache::THost::AddrsV6ToString() const { } return ss.Str(); } - -TDnsCache::TAresLibInit TDnsCache::InitAresLib; + +TDnsCache::TAresLibInit TDnsCache::InitAresLib; diff --git a/library/cpp/actors/dnscachelib/dnscache.h b/library/cpp/actors/dnscachelib/dnscache.h index 3313a251a1..586957b9a0 100644 --- a/library/cpp/actors/dnscachelib/dnscache.h +++ b/library/cpp/actors/dnscachelib/dnscache.h @@ -1,6 +1,6 @@ #pragma once -#include <contrib/libs/c-ares/ares.h> +#include <contrib/libs/c-ares/ares.h> #include <util/generic/map.h> #include <util/generic/vector.h> #include <util/network/address.h> @@ -28,9 +28,9 @@ public: /* use with AF_INET, AF_INET6 or AF_UNSPEC */ NAddr::IRemoteAddrPtr GetAddr(const TString& host, - int family, - TIpPort port = 0, - bool cacheOnly = false); + int family, + TIpPort port = 0, + bool cacheOnly = false); void GetAllAddresses(const TString& host, TVector<NAddr::IRemoteAddrPtr>&); @@ -68,8 +68,8 @@ private: TString AddrsV4ToString() const; TString AddrsV6ToString() const; - - bool IsStale(int family, const TDnsCache* ctx) const noexcept; + + bool IsStale(int family, const TDnsCache* ctx) const noexcept; }; typedef TMap<TString, THost> THostCache; @@ -99,9 +99,9 @@ private: typedef TMap<in6_addr, TAddr, TAddrCmp> TAddrCache; const THost& Resolve(const TString&, int family, bool cacheOnly = false); - + const TAddr& ResolveAddr(const in6_addr&, int family); - + void WaitTask(TAtomic&); static void GHBNCallback(void* arg, int status, int timeouts, @@ -128,21 +128,21 @@ private: TMutex AresMtx; void* Channel; - - struct TAresLibInit { - TAresLibInit() { + + struct TAresLibInit { + TAresLibInit() { #ifdef _win_ - const auto res = ares_library_init(ARES_LIB_INIT_ALL); - Y_VERIFY(res == 0); + const auto res = ares_library_init(ARES_LIB_INIT_ALL); + Y_VERIFY(res == 0); #endif - } - - ~TAresLibInit() { + } + + ~TAresLibInit() { #ifdef _win_ - ares_library_cleanup(); + ares_library_cleanup(); #endif - } - }; - - static TAresLibInit InitAresLib; + } + }; + + static TAresLibInit InitAresLib; }; diff --git a/library/cpp/actors/memory_log/memlog.cpp b/library/cpp/actors/memory_log/memlog.cpp index 8e6b46727d..f20162db70 100644 --- a/library/cpp/actors/memory_log/memlog.cpp +++ b/library/cpp/actors/memory_log/memlog.cpp @@ -1,28 +1,28 @@ -#include "memlog.h" - +#include "memlog.h" + #include <library/cpp/actors/util/datetime.h> -#include <util/system/info.h> -#include <util/system/atomic.h> -#include <util/system/align.h> - -#include <contrib/libs/linuxvdso/interface.h> - -#if (defined(_i386_) || defined(_x86_64_)) && defined(_linux_) -#define HAVE_VDSO_GETCPU 1 -#include <contrib/libs/linuxvdso/interface.h> -static int (*FastGetCpu)(unsigned* cpu, unsigned* node, void* unused); -#endif - -#if defined(_unix_) +#include <util/system/info.h> +#include <util/system/atomic.h> +#include <util/system/align.h> + +#include <contrib/libs/linuxvdso/interface.h> + +#if (defined(_i386_) || defined(_x86_64_)) && defined(_linux_) +#define HAVE_VDSO_GETCPU 1 +#include <contrib/libs/linuxvdso/interface.h> +static int (*FastGetCpu)(unsigned* cpu, unsigned* node, void* unused); +#endif + +#if defined(_unix_) #include <sched.h> -#elif defined(_win_) +#elif defined(_win_) #include <WinBase.h> -#else +#else #error NO IMPLEMENTATION FOR THE PLATFORM -#endif - -const char TMemoryLog::DEFAULT_LAST_MARK[16] = { +#endif + +const char TMemoryLog::DEFAULT_LAST_MARK[16] = { 'c', 'b', '7', @@ -39,9 +39,9 @@ const char TMemoryLog::DEFAULT_LAST_MARK[16] = { '4', '5', '\n', -}; - -const char TMemoryLog::CLEAR_MARK[16] = { +}; + +const char TMemoryLog::CLEAR_MARK[16] = { ' ', ' ', ' ', @@ -58,146 +58,146 @@ const char TMemoryLog::CLEAR_MARK[16] = { ' ', ' ', '\n', -}; - -unsigned TMemoryLog::GetSelfCpu() noexcept { -#if defined(_unix_) +}; + +unsigned TMemoryLog::GetSelfCpu() noexcept { +#if defined(_unix_) #if HAVE_VDSO_GETCPU - unsigned cpu; - if (Y_LIKELY(FastGetCpu != nullptr)) { - auto result = FastGetCpu(&cpu, nullptr, nullptr); - Y_VERIFY(result == 0); + unsigned cpu; + if (Y_LIKELY(FastGetCpu != nullptr)) { + auto result = FastGetCpu(&cpu, nullptr, nullptr); + Y_VERIFY(result == 0); return cpu; - } else { - return 0; - } - + } else { + return 0; + } + #elif defined(_x86_64_) || defined(_i386_) - + #define CPUID(func, eax, ebx, ecx, edx) \ __asm__ __volatile__( \ "cpuid" \ : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) \ : "a"(func)); - - int a = 0, b = 0, c = 0, d = 0; - CPUID(0x1, a, b, c, d); - int acpiID = (b >> 24); - return acpiID; - + + int a = 0, b = 0, c = 0, d = 0; + CPUID(0x1, a, b, c, d); + int acpiID = (b >> 24); + return acpiID; + #elif defined(__CNUC__) - return sched_getcpu(); + return sched_getcpu(); #else - return 0; + return 0; #endif - -#elif defined(_win_) - return GetCurrentProcessorNumber(); -#else - return 0; -#endif -} - -TMemoryLog* TMemoryLog::MemLogBuffer = nullptr; + +#elif defined(_win_) + return GetCurrentProcessorNumber(); +#else + return 0; +#endif +} + +TMemoryLog* TMemoryLog::MemLogBuffer = nullptr; Y_POD_THREAD(TThread::TId) TMemoryLog::LogThreadId; -char* TMemoryLog::LastMarkIsHere = nullptr; - -std::atomic<bool> TMemoryLog::PrintLastMark(true); - +char* TMemoryLog::LastMarkIsHere = nullptr; + +std::atomic<bool> TMemoryLog::PrintLastMark(true); + TMemoryLog::TMemoryLog(size_t totalSize, size_t grainSize) : GrainSize(grainSize) , FreeGrains(DEFAULT_TOTAL_SIZE / DEFAULT_GRAIN_SIZE * 2) , Buf(totalSize) -{ - Y_VERIFY(DEFAULT_TOTAL_SIZE % DEFAULT_GRAIN_SIZE == 0); - NumberOfGrains = DEFAULT_TOTAL_SIZE / DEFAULT_GRAIN_SIZE; - - for (size_t i = 0; i < NumberOfGrains; ++i) { - new (GetGrain(i)) TGrain; - } - - NumberOfCpus = NSystemInfo::NumberOfCpus(); - Y_VERIFY(NumberOfGrains > NumberOfCpus); - ActiveGrains.Reset(new TGrain*[NumberOfCpus]); - for (size_t i = 0; i < NumberOfCpus; ++i) { - ActiveGrains[i] = GetGrain(i); - } - - for (size_t i = NumberOfCpus; i < NumberOfGrains; ++i) { - FreeGrains.StubbornPush(GetGrain(i)); - } - -#if HAVE_VDSO_GETCPU - auto vdsoFunc = (decltype(FastGetCpu)) - NVdso::Function("__vdso_getcpu", "LINUX_2.6"); - AtomicSet(FastGetCpu, vdsoFunc); -#endif -} - -void* TMemoryLog::GetWriteBuffer(size_t amount) noexcept { - // alignment required by NoCacheMemcpy - amount = AlignUp<size_t>(amount, MemcpyAlignment); - - for (ui16 tries = MAX_GET_BUFFER_TRIES; tries-- > 0;) { - auto myCpu = GetSelfCpu(); - - TGrain* grain = AtomicGet(ActiveGrains[myCpu]); - - if (grain != nullptr) { - auto mine = AtomicGetAndAdd(grain->WritePointer, amount); - if (mine + amount <= GrainSize - sizeof(TGrain)) { - return &grain->Data[mine]; - } - - if (!AtomicCas(&ActiveGrains[myCpu], 0, grain)) { - continue; - } - - FreeGrains.StubbornPush(grain); - } - - grain = (TGrain*)FreeGrains.Pop(); - - if (grain == nullptr) { - return nullptr; - } - - grain->WritePointer = 0; - - if (!AtomicCas(&ActiveGrains[myCpu], grain, 0)) { - FreeGrains.StubbornPush(grain); - continue; - } - } - - return nullptr; -} - -void ClearAlignedTail(char* tail) noexcept { - auto aligned = AlignUp(tail, TMemoryLog::MemcpyAlignment); - if (aligned > tail) { - memset(tail, 0, aligned - tail); - } -} - -#if defined(_x86_64_) || defined(_i386_) -#include <xmmintrin.h> -// the main motivation is not poluting CPU cache -NO_SANITIZE_THREAD -void NoCacheMemcpy(char* dst, const char* src, size_t size) noexcept { - while (size >= sizeof(__m128) * 2) { - __m128 a = _mm_load_ps((float*)(src + 0 * sizeof(__m128))); - __m128 b = _mm_load_ps((float*)(src + 1 * sizeof(__m128))); - _mm_stream_ps((float*)(dst + 0 * sizeof(__m128)), a); - _mm_stream_ps((float*)(dst + 1 * sizeof(__m128)), b); - - size -= sizeof(__m128) * 2; - src += sizeof(__m128) * 2; - dst += sizeof(__m128) * 2; - } - memcpy(dst, src, size); -} +{ + Y_VERIFY(DEFAULT_TOTAL_SIZE % DEFAULT_GRAIN_SIZE == 0); + NumberOfGrains = DEFAULT_TOTAL_SIZE / DEFAULT_GRAIN_SIZE; + + for (size_t i = 0; i < NumberOfGrains; ++i) { + new (GetGrain(i)) TGrain; + } + + NumberOfCpus = NSystemInfo::NumberOfCpus(); + Y_VERIFY(NumberOfGrains > NumberOfCpus); + ActiveGrains.Reset(new TGrain*[NumberOfCpus]); + for (size_t i = 0; i < NumberOfCpus; ++i) { + ActiveGrains[i] = GetGrain(i); + } + + for (size_t i = NumberOfCpus; i < NumberOfGrains; ++i) { + FreeGrains.StubbornPush(GetGrain(i)); + } + +#if HAVE_VDSO_GETCPU + auto vdsoFunc = (decltype(FastGetCpu)) + NVdso::Function("__vdso_getcpu", "LINUX_2.6"); + AtomicSet(FastGetCpu, vdsoFunc); +#endif +} + +void* TMemoryLog::GetWriteBuffer(size_t amount) noexcept { + // alignment required by NoCacheMemcpy + amount = AlignUp<size_t>(amount, MemcpyAlignment); + + for (ui16 tries = MAX_GET_BUFFER_TRIES; tries-- > 0;) { + auto myCpu = GetSelfCpu(); + + TGrain* grain = AtomicGet(ActiveGrains[myCpu]); + + if (grain != nullptr) { + auto mine = AtomicGetAndAdd(grain->WritePointer, amount); + if (mine + amount <= GrainSize - sizeof(TGrain)) { + return &grain->Data[mine]; + } + + if (!AtomicCas(&ActiveGrains[myCpu], 0, grain)) { + continue; + } + + FreeGrains.StubbornPush(grain); + } + + grain = (TGrain*)FreeGrains.Pop(); + + if (grain == nullptr) { + return nullptr; + } + + grain->WritePointer = 0; + + if (!AtomicCas(&ActiveGrains[myCpu], grain, 0)) { + FreeGrains.StubbornPush(grain); + continue; + } + } + + return nullptr; +} + +void ClearAlignedTail(char* tail) noexcept { + auto aligned = AlignUp(tail, TMemoryLog::MemcpyAlignment); + if (aligned > tail) { + memset(tail, 0, aligned - tail); + } +} + +#if defined(_x86_64_) || defined(_i386_) +#include <xmmintrin.h> +// the main motivation is not poluting CPU cache +NO_SANITIZE_THREAD +void NoCacheMemcpy(char* dst, const char* src, size_t size) noexcept { + while (size >= sizeof(__m128) * 2) { + __m128 a = _mm_load_ps((float*)(src + 0 * sizeof(__m128))); + __m128 b = _mm_load_ps((float*)(src + 1 * sizeof(__m128))); + _mm_stream_ps((float*)(dst + 0 * sizeof(__m128)), a); + _mm_stream_ps((float*)(dst + 1 * sizeof(__m128)), b); + + size -= sizeof(__m128) * 2; + src += sizeof(__m128) * 2; + dst += sizeof(__m128) * 2; + } + memcpy(dst, src, size); +} NO_SANITIZE_THREAD void NoWCacheMemcpy(char* dst, const char* src, size_t size) noexcept { @@ -224,144 +224,144 @@ void NoWCacheMemcpy(char* dst, const char* src, size_t size) noexcept { } } -#endif - -NO_SANITIZE_THREAD -char* BareMemLogWrite(const char* begin, size_t msgSize, bool isLast) noexcept { - bool lastMark = - isLast && TMemoryLog::PrintLastMark.load(std::memory_order_acquire); - size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize; - - char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount); - if (buffer == nullptr) { - return nullptr; - } - -#if defined(_x86_64_) || defined(_i386_) - if (AlignDown(begin, TMemoryLog::MemcpyAlignment) == begin) { - NoCacheMemcpy(buffer, begin, msgSize); +#endif + +NO_SANITIZE_THREAD +char* BareMemLogWrite(const char* begin, size_t msgSize, bool isLast) noexcept { + bool lastMark = + isLast && TMemoryLog::PrintLastMark.load(std::memory_order_acquire); + size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize; + + char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount); + if (buffer == nullptr) { + return nullptr; + } + +#if defined(_x86_64_) || defined(_i386_) + if (AlignDown(begin, TMemoryLog::MemcpyAlignment) == begin) { + NoCacheMemcpy(buffer, begin, msgSize); } else { NoWCacheMemcpy(buffer, begin, msgSize); } #else memcpy(buffer, begin, msgSize); #endif - - if (lastMark) { - TMemoryLog::ChangeLastMark(buffer + msgSize); - } - - ClearAlignedTail(buffer + amount); - return buffer; -} - -NO_SANITIZE_THREAD -bool MemLogWrite(const char* begin, size_t msgSize, bool addLF) noexcept { - bool lastMark = TMemoryLog::PrintLastMark.load(std::memory_order_acquire); - size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize; - - // Let's construct prolog with timestamp and thread id - auto threadId = TMemoryLog::GetTheadId(); - - // alignment required by NoCacheMemcpy - // check for format for snprintf - constexpr size_t prologSize = 48; + + if (lastMark) { + TMemoryLog::ChangeLastMark(buffer + msgSize); + } + + ClearAlignedTail(buffer + amount); + return buffer; +} + +NO_SANITIZE_THREAD +bool MemLogWrite(const char* begin, size_t msgSize, bool addLF) noexcept { + bool lastMark = TMemoryLog::PrintLastMark.load(std::memory_order_acquire); + size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize; + + // Let's construct prolog with timestamp and thread id + auto threadId = TMemoryLog::GetTheadId(); + + // alignment required by NoCacheMemcpy + // check for format for snprintf + constexpr size_t prologSize = 48; alignas(TMemoryLog::MemcpyAlignment) char prolog[prologSize + 1]; Y_VERIFY(AlignDown(&prolog, TMemoryLog::MemcpyAlignment) == &prolog); - - int snprintfResult = snprintf(prolog, prologSize + 1, + + int snprintfResult = snprintf(prolog, prologSize + 1, "TS %020" PRIu64 " TI %020" PRIu64 " ", GetCycleCountFast(), threadId); - - if (snprintfResult < 0) { - return false; - } - Y_VERIFY(snprintfResult == prologSize); - - amount += prologSize; - if (addLF) { - ++amount; // add 1 byte for \n at the end of the message - } - - char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount); - if (buffer == nullptr) { - return false; - } - -#if defined(_x86_64_) || defined(_i386_) + + if (snprintfResult < 0) { + return false; + } + Y_VERIFY(snprintfResult == prologSize); + + amount += prologSize; + if (addLF) { + ++amount; // add 1 byte for \n at the end of the message + } + + char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount); + if (buffer == nullptr) { + return false; + } + +#if defined(_x86_64_) || defined(_i386_) // warning: copy prolog first to avoid corruption of the message // by prolog tail NoCacheMemcpy(buffer, prolog, prologSize); if (AlignDown(begin + prologSize, TMemoryLog::MemcpyAlignment) == begin + prologSize) { NoCacheMemcpy(buffer + prologSize, begin, msgSize); - } else { + } else { NoWCacheMemcpy(buffer + prologSize, begin, msgSize); } #else memcpy(buffer, prolog, prologSize); memcpy(buffer + prologSize, begin, msgSize); #endif - - if (addLF) { - buffer[prologSize + msgSize] = '\n'; - } - - if (lastMark) { - TMemoryLog::ChangeLastMark(buffer + prologSize + msgSize + (int)addLF); - } - - ClearAlignedTail(buffer + amount); - return true; -} - -NO_SANITIZE_THREAD -void TMemoryLog::ChangeLastMark(char* buffer) noexcept { - memcpy(buffer, DEFAULT_LAST_MARK, LAST_MARK_SIZE); - auto oldMark = AtomicSwap(&LastMarkIsHere, buffer); - if (Y_LIKELY(oldMark != nullptr)) { - memcpy(oldMark, CLEAR_MARK, LAST_MARK_SIZE); - } - if (AtomicGet(LastMarkIsHere) != buffer) { - memcpy(buffer, CLEAR_MARK, LAST_MARK_SIZE); - AtomicBarrier(); - } -} - -bool MemLogVPrintF(const char* format, va_list params) noexcept { - auto logger = TMemoryLog::GetMemoryLogger(); - if (logger == nullptr) { - return false; - } - - auto threadId = TMemoryLog::GetTheadId(); - - // alignment required by NoCacheMemcpy + + if (addLF) { + buffer[prologSize + msgSize] = '\n'; + } + + if (lastMark) { + TMemoryLog::ChangeLastMark(buffer + prologSize + msgSize + (int)addLF); + } + + ClearAlignedTail(buffer + amount); + return true; +} + +NO_SANITIZE_THREAD +void TMemoryLog::ChangeLastMark(char* buffer) noexcept { + memcpy(buffer, DEFAULT_LAST_MARK, LAST_MARK_SIZE); + auto oldMark = AtomicSwap(&LastMarkIsHere, buffer); + if (Y_LIKELY(oldMark != nullptr)) { + memcpy(oldMark, CLEAR_MARK, LAST_MARK_SIZE); + } + if (AtomicGet(LastMarkIsHere) != buffer) { + memcpy(buffer, CLEAR_MARK, LAST_MARK_SIZE); + AtomicBarrier(); + } +} + +bool MemLogVPrintF(const char* format, va_list params) noexcept { + auto logger = TMemoryLog::GetMemoryLogger(); + if (logger == nullptr) { + return false; + } + + auto threadId = TMemoryLog::GetTheadId(); + + // alignment required by NoCacheMemcpy alignas(TMemoryLog::MemcpyAlignment) char buf[TMemoryLog::MAX_MESSAGE_SIZE]; Y_VERIFY(AlignDown(&buf, TMemoryLog::MemcpyAlignment) == &buf); - + int prologSize = snprintf(buf, TMemoryLog::MAX_MESSAGE_SIZE - 2, "TS %020" PRIu64 " TI %020" PRIu64 " ", GetCycleCountFast(), threadId); - - if (Y_UNLIKELY(prologSize < 0)) { - return false; - } - Y_VERIFY((ui32)prologSize <= TMemoryLog::MAX_MESSAGE_SIZE); - - int add = vsnprintf( + + if (Y_UNLIKELY(prologSize < 0)) { + return false; + } + Y_VERIFY((ui32)prologSize <= TMemoryLog::MAX_MESSAGE_SIZE); + + int add = vsnprintf( &buf[prologSize], - TMemoryLog::MAX_MESSAGE_SIZE - prologSize - 2, - format, params); - - if (Y_UNLIKELY(add < 0)) { - return false; - } - Y_VERIFY(add >= 0); - auto totalSize = prologSize + add; - + TMemoryLog::MAX_MESSAGE_SIZE - prologSize - 2, + format, params); + + if (Y_UNLIKELY(add < 0)) { + return false; + } + Y_VERIFY(add >= 0); + auto totalSize = prologSize + add; + buf[totalSize++] = '\n'; - Y_VERIFY((ui32)totalSize <= TMemoryLog::MAX_MESSAGE_SIZE); - + Y_VERIFY((ui32)totalSize <= TMemoryLog::MAX_MESSAGE_SIZE); + return BareMemLogWrite(buf, totalSize) != nullptr; -} +} diff --git a/library/cpp/actors/memory_log/memlog.h b/library/cpp/actors/memory_log/memlog.h index 2aa27272a6..fe66efc4fb 100644 --- a/library/cpp/actors/memory_log/memlog.h +++ b/library/cpp/actors/memory_log/memlog.h @@ -1,211 +1,211 @@ -#pragma once - +#pragma once + #include <library/cpp/threading/queue/mpmc_unordered_ring.h> #include <util/generic/string.h> -#include <util/string/printf.h> -#include <util/system/datetime.h> -#include <util/system/thread.h> -#include <util/system/types.h> -#include <util/system/atomic.h> -#include <util/system/align.h> -#include <util/system/tls.h> - -#include <atomic> -#include <cstdio> - -#ifdef _win_ -#include <util/system/winint.h> -#endif - -#ifndef NO_SANITIZE_THREAD +#include <util/string/printf.h> +#include <util/system/datetime.h> +#include <util/system/thread.h> +#include <util/system/types.h> +#include <util/system/atomic.h> +#include <util/system/align.h> +#include <util/system/tls.h> + +#include <atomic> +#include <cstdio> + +#ifdef _win_ +#include <util/system/winint.h> +#endif + +#ifndef NO_SANITIZE_THREAD #define NO_SANITIZE_THREAD #if defined(__has_feature) #if __has_feature(thread_sanitizer) #undef NO_SANITIZE_THREAD #define NO_SANITIZE_THREAD __attribute__((no_sanitize_thread)) +#endif #endif #endif -#endif - -class TMemoryLog { -public: - static constexpr size_t DEFAULT_TOTAL_SIZE = 10 * 1024 * 1024; - static constexpr size_t DEFAULT_GRAIN_SIZE = 1024 * 64; - static constexpr size_t MAX_MESSAGE_SIZE = 1024; - static constexpr ui16 MAX_GET_BUFFER_TRIES = 4; - static constexpr ui16 MemcpyAlignment = 16; - - // search for cb7B68a8A561645 - static const char DEFAULT_LAST_MARK[16]; - static const char CLEAR_MARK[16]; - - static constexpr size_t LAST_MARK_SIZE = sizeof(DEFAULT_LAST_MARK); - - inline static TMemoryLog* GetMemoryLogger() noexcept { - return AtomicGet(MemLogBuffer); - } - + +class TMemoryLog { +public: + static constexpr size_t DEFAULT_TOTAL_SIZE = 10 * 1024 * 1024; + static constexpr size_t DEFAULT_GRAIN_SIZE = 1024 * 64; + static constexpr size_t MAX_MESSAGE_SIZE = 1024; + static constexpr ui16 MAX_GET_BUFFER_TRIES = 4; + static constexpr ui16 MemcpyAlignment = 16; + + // search for cb7B68a8A561645 + static const char DEFAULT_LAST_MARK[16]; + static const char CLEAR_MARK[16]; + + static constexpr size_t LAST_MARK_SIZE = sizeof(DEFAULT_LAST_MARK); + + inline static TMemoryLog* GetMemoryLogger() noexcept { + return AtomicGet(MemLogBuffer); + } + void* GetWriteBuffer(size_t amount) noexcept; - - inline static void* GetWriteBufferStatic(size_t amount) noexcept { - auto logger = GetMemoryLogger(); - if (logger == nullptr) { - return nullptr; - } - return logger->GetWriteBuffer(amount); - } - - size_t GetGlobalBufferSize() const noexcept { - return Buf.GetSize(); - } - - inline static void CreateMemoryLogBuffer( + + inline static void* GetWriteBufferStatic(size_t amount) noexcept { + auto logger = GetMemoryLogger(); + if (logger == nullptr) { + return nullptr; + } + return logger->GetWriteBuffer(amount); + } + + size_t GetGlobalBufferSize() const noexcept { + return Buf.GetSize(); + } + + inline static void CreateMemoryLogBuffer( size_t totalSize = DEFAULT_TOTAL_SIZE, size_t grainSize = DEFAULT_GRAIN_SIZE) Y_COLD { - if (AtomicGet(MemLogBuffer) != nullptr) { - return; - } - - AtomicSet(MemLogBuffer, new TMemoryLog(totalSize, grainSize)); - } - - static std::atomic<bool> PrintLastMark; - - // buffer must be at least 16 bytes + if (AtomicGet(MemLogBuffer) != nullptr) { + return; + } + + AtomicSet(MemLogBuffer, new TMemoryLog(totalSize, grainSize)); + } + + static std::atomic<bool> PrintLastMark; + + // buffer must be at least 16 bytes static void ChangeLastMark(char* buffer) noexcept; - - inline static TThread::TId GetTheadId() noexcept { - if (LogThreadId == 0) { - LogThreadId = TThread::CurrentThreadId(); - } - return LogThreadId; - } - -private: + + inline static TThread::TId GetTheadId() noexcept { + if (LogThreadId == 0) { + LogThreadId = TThread::CurrentThreadId(); + } + return LogThreadId; + } + +private: TMemoryLog(size_t totalSize, size_t grainSize) Y_COLD; - - struct TGrain { - TAtomic WritePointer = 0; - char Padding[MemcpyAlignment - sizeof(TAtomic)]; - char Data[]; - }; - - size_t NumberOfCpus; - size_t GrainSize; - size_t NumberOfGrains; - TArrayPtr<TGrain*> ActiveGrains; - NThreading::TMPMCUnorderedRing FreeGrains; - - TGrain* GetGrain(size_t grainIndex) const noexcept { - return (TGrain*)((char*)GetGlobalBuffer() + GrainSize * grainIndex); - } - - class TMMapArea { - public: - TMMapArea(size_t amount) Y_COLD { - MMap(amount); - } - - TMMapArea(const TMMapArea&) = delete; - TMMapArea& operator=(const TMMapArea& copy) = delete; - - TMMapArea(TMMapArea&& move) Y_COLD { - BufPtr = move.BufPtr; - Size = move.Size; - - move.BufPtr = nullptr; - move.Size = 0; - } - - TMMapArea& operator=(TMMapArea&& move) Y_COLD { - BufPtr = move.BufPtr; - Size = move.Size; - - move.BufPtr = nullptr; - move.Size = 0; - return *this; - } - - void Reset(size_t amount) Y_COLD { - MUnmap(); - MMap(amount); - } - - ~TMMapArea() noexcept Y_COLD { - MUnmap(); - } - - size_t GetSize() const noexcept { - return Size; - } - - void* GetPtr() const noexcept { - return BufPtr; - } - - private: - void* BufPtr; - size_t Size; -#ifdef _win_ - HANDLE Mapping; -#endif - - void MMap(size_t amount); - void MUnmap(); - }; - - TMMapArea Buf; - - void* GetGlobalBuffer() const noexcept { - return Buf.GetPtr(); - } - - static unsigned GetSelfCpu() noexcept; - - static TMemoryLog* MemLogBuffer; - static Y_POD_THREAD(TThread::TId) LogThreadId; - static char* LastMarkIsHere; -}; - -// it's no use of sanitizing this function -NO_SANITIZE_THREAD + + struct TGrain { + TAtomic WritePointer = 0; + char Padding[MemcpyAlignment - sizeof(TAtomic)]; + char Data[]; + }; + + size_t NumberOfCpus; + size_t GrainSize; + size_t NumberOfGrains; + TArrayPtr<TGrain*> ActiveGrains; + NThreading::TMPMCUnorderedRing FreeGrains; + + TGrain* GetGrain(size_t grainIndex) const noexcept { + return (TGrain*)((char*)GetGlobalBuffer() + GrainSize * grainIndex); + } + + class TMMapArea { + public: + TMMapArea(size_t amount) Y_COLD { + MMap(amount); + } + + TMMapArea(const TMMapArea&) = delete; + TMMapArea& operator=(const TMMapArea& copy) = delete; + + TMMapArea(TMMapArea&& move) Y_COLD { + BufPtr = move.BufPtr; + Size = move.Size; + + move.BufPtr = nullptr; + move.Size = 0; + } + + TMMapArea& operator=(TMMapArea&& move) Y_COLD { + BufPtr = move.BufPtr; + Size = move.Size; + + move.BufPtr = nullptr; + move.Size = 0; + return *this; + } + + void Reset(size_t amount) Y_COLD { + MUnmap(); + MMap(amount); + } + + ~TMMapArea() noexcept Y_COLD { + MUnmap(); + } + + size_t GetSize() const noexcept { + return Size; + } + + void* GetPtr() const noexcept { + return BufPtr; + } + + private: + void* BufPtr; + size_t Size; +#ifdef _win_ + HANDLE Mapping; +#endif + + void MMap(size_t amount); + void MUnmap(); + }; + + TMMapArea Buf; + + void* GetGlobalBuffer() const noexcept { + return Buf.GetPtr(); + } + + static unsigned GetSelfCpu() noexcept; + + static TMemoryLog* MemLogBuffer; + static Y_POD_THREAD(TThread::TId) LogThreadId; + static char* LastMarkIsHere; +}; + +// it's no use of sanitizing this function +NO_SANITIZE_THREAD char* BareMemLogWrite( - const char* begin, size_t msgSize, bool isLast = true) noexcept; - -// it's no use of sanitizing this function -NO_SANITIZE_THREAD + const char* begin, size_t msgSize, bool isLast = true) noexcept; + +// it's no use of sanitizing this function +NO_SANITIZE_THREAD bool MemLogWrite( - const char* begin, size_t msgSize, bool addLF = false) noexcept; - -Y_WRAPPER inline bool MemLogWrite(const char* begin, const char* end) noexcept { - if (end <= begin) { - return false; - } - - size_t msgSize = end - begin; - return MemLogWrite(begin, msgSize); -} - -template <typename TObj> -bool MemLogWriteStruct(const TObj* obj) noexcept { - auto begin = (const char*)(const void*)obj; - return MemLogWrite(begin, begin + sizeof(TObj)); -} - + const char* begin, size_t msgSize, bool addLF = false) noexcept; + +Y_WRAPPER inline bool MemLogWrite(const char* begin, const char* end) noexcept { + if (end <= begin) { + return false; + } + + size_t msgSize = end - begin; + return MemLogWrite(begin, msgSize); +} + +template <typename TObj> +bool MemLogWriteStruct(const TObj* obj) noexcept { + auto begin = (const char*)(const void*)obj; + return MemLogWrite(begin, begin + sizeof(TObj)); +} + Y_PRINTF_FORMAT(1, 0) -bool MemLogVPrintF(const char* format, va_list params) noexcept; - +bool MemLogVPrintF(const char* format, va_list params) noexcept; + Y_PRINTF_FORMAT(1, 2) Y_WRAPPER -inline bool MemLogPrintF(const char* format, ...) noexcept { - va_list params; - va_start(params, format); - auto result = MemLogVPrintF(format, params); - va_end(params); - return result; -} - -Y_WRAPPER inline bool MemLogWriteNullTerm(const char* str) noexcept { - return MemLogWrite(str, strlen(str)); -} +inline bool MemLogPrintF(const char* format, ...) noexcept { + va_list params; + va_start(params, format); + auto result = MemLogVPrintF(format, params); + va_end(params); + return result; +} + +Y_WRAPPER inline bool MemLogWriteNullTerm(const char* str) noexcept { + return MemLogWrite(str, strlen(str)); +} diff --git a/library/cpp/actors/memory_log/mmap.cpp b/library/cpp/actors/memory_log/mmap.cpp index 201998d343..b72feb1112 100644 --- a/library/cpp/actors/memory_log/mmap.cpp +++ b/library/cpp/actors/memory_log/mmap.cpp @@ -1,63 +1,63 @@ -#include "memlog.h" - +#include "memlog.h" + #if defined(_unix_) #include <sys/mman.h> #elif defined(_win_) #include <util/system/winint.h> -#else +#else #error NO IMPLEMENTATION FOR THE PLATFORM -#endif - -void TMemoryLog::TMMapArea::MMap(size_t amount) { - Y_VERIFY(amount > 0); - +#endif + +void TMemoryLog::TMMapArea::MMap(size_t amount) { + Y_VERIFY(amount > 0); + +#if defined(_unix_) + constexpr int mmapProt = PROT_READ | PROT_WRITE; +#if defined(_linux_) + constexpr int mmapFlags = MAP_PRIVATE | MAP_ANON | MAP_POPULATE; +#else + constexpr int mmapFlags = MAP_PRIVATE | MAP_ANON; +#endif + + BufPtr = ::mmap(nullptr, amount, mmapProt, mmapFlags, -1, 0); + if (BufPtr == MAP_FAILED) { + throw std::bad_alloc(); + } + +#elif defined(_win_) + Mapping = ::CreateFileMapping( + (HANDLE)-1, nullptr, PAGE_READWRITE, 0, amount, nullptr); + if (Mapping == NULL) { + throw std::bad_alloc(); + } + BufPtr = ::MapViewOfFile(Mapping, FILE_MAP_WRITE, 0, 0, amount); + if (BufPtr == NULL) { + throw std::bad_alloc(); + } +#endif + + Size = amount; +} + +void TMemoryLog::TMMapArea::MUnmap() { + if (BufPtr == nullptr) { + return; + } + #if defined(_unix_) - constexpr int mmapProt = PROT_READ | PROT_WRITE; -#if defined(_linux_) - constexpr int mmapFlags = MAP_PRIVATE | MAP_ANON | MAP_POPULATE; -#else - constexpr int mmapFlags = MAP_PRIVATE | MAP_ANON; -#endif - - BufPtr = ::mmap(nullptr, amount, mmapProt, mmapFlags, -1, 0); - if (BufPtr == MAP_FAILED) { - throw std::bad_alloc(); - } - -#elif defined(_win_) - Mapping = ::CreateFileMapping( - (HANDLE)-1, nullptr, PAGE_READWRITE, 0, amount, nullptr); - if (Mapping == NULL) { - throw std::bad_alloc(); - } - BufPtr = ::MapViewOfFile(Mapping, FILE_MAP_WRITE, 0, 0, amount); - if (BufPtr == NULL) { - throw std::bad_alloc(); - } -#endif - - Size = amount; -} - -void TMemoryLog::TMMapArea::MUnmap() { - if (BufPtr == nullptr) { - return; - } - -#if defined(_unix_) - int result = ::munmap(BufPtr, Size); - Y_VERIFY(result == 0); - -#elif defined(_win_) - BOOL result = ::UnmapViewOfFile(BufPtr); - Y_VERIFY(result != 0); - - result = ::CloseHandle(Mapping); - Y_VERIFY(result != 0); - - Mapping = 0; -#endif - - BufPtr = nullptr; - Size = 0; -} + int result = ::munmap(BufPtr, Size); + Y_VERIFY(result == 0); + +#elif defined(_win_) + BOOL result = ::UnmapViewOfFile(BufPtr); + Y_VERIFY(result != 0); + + result = ::CloseHandle(Mapping); + Y_VERIFY(result != 0); + + Mapping = 0; +#endif + + BufPtr = nullptr; + Size = 0; +} diff --git a/library/cpp/actors/memory_log/ya.make b/library/cpp/actors/memory_log/ya.make index d89d5db4d7..441b51b3c7 100644 --- a/library/cpp/actors/memory_log/ya.make +++ b/library/cpp/actors/memory_log/ya.make @@ -1,19 +1,19 @@ -LIBRARY() - +LIBRARY() + OWNER( agri g:kikimr ) - -SRCS( - memlog.cpp - memlog.h - mmap.cpp -) - -PEERDIR( + +SRCS( + memlog.cpp + memlog.h + mmap.cpp +) + +PEERDIR( library/cpp/threading/queue - contrib/libs/linuxvdso -) - -END() + contrib/libs/linuxvdso +) + +END() diff --git a/library/cpp/actors/prof/tag.cpp b/library/cpp/actors/prof/tag.cpp index 9ccf03e1a9..46b53d804f 100644 --- a/library/cpp/actors/prof/tag.cpp +++ b/library/cpp/actors/prof/tag.cpp @@ -1,6 +1,6 @@ -#include "tag.h" +#include "tag.h" #include "tcmalloc.h" - + #include <library/cpp/charset/ci_string.h> #include <library/cpp/containers/atomizer/atomizer.h> #include <library/cpp/malloc/api/malloc.h> @@ -13,9 +13,9 @@ #include <util/generic/singleton.h> #include <util/generic/string.h> #include <util/generic/vector.h> -#include <util/system/mutex.h> - -namespace NProfiling { +#include <util/system/mutex.h> + +namespace NProfiling { class TStringAtoms { private: TMutex Mutex; @@ -59,19 +59,19 @@ namespace NProfiling { } } }; - + ui32 MakeTag(const char* s) { return TStringAtoms::Instance().MakeTag(s); } - + ui32 MakeTags(const TVector<const char*>& ss) { return TStringAtoms::Instance().MakeTags(ss); } - + const char* GetTag(ui32 tag) { return TStringAtoms::Instance().GetTag(tag); - } - + } + size_t GetTagsCount() { return TStringAtoms::Instance().GetTagsCount(); } diff --git a/library/cpp/actors/prof/tag.h b/library/cpp/actors/prof/tag.h index 357e264a22..ec4bed5b08 100644 --- a/library/cpp/actors/prof/tag.h +++ b/library/cpp/actors/prof/tag.h @@ -1,22 +1,22 @@ -#pragma once - +#pragma once + #include <util/generic/fwd.h> - -/* - Common registry for tagging memory profiler. - Register a new tag with MakeTag using a unique string. + +/* + Common registry for tagging memory profiler. + Register a new tag with MakeTag using a unique string. Use registered tags with SetThreadAllocTag function in allocator API. -*/ - -namespace NProfiling { +*/ + +namespace NProfiling { ui32 MakeTag(const char* s); - + // Make only unique tags. Y_VERIFY inside. ui32 MakeTags(const TVector<const char*>& ss); - + const char* GetTag(ui32 tag); size_t GetTagsCount(); - + using TSetThreadAllocTag = ui32(ui32 tag); extern TSetThreadAllocTag* SetThreadAllocTag; @@ -31,32 +31,32 @@ namespace NProfiling { ui32 newTag = MakeTag(tagName); RestoreTag = SetThreadAllocTag(newTag); } - + TMemoryTagScope(TMemoryTagScope&& move) : RestoreTag(move.RestoreTag) , Released(move.Released) { move.Released = true; } - + TMemoryTagScope& operator=(TMemoryTagScope&& move) { RestoreTag = move.RestoreTag; Released = move.Released; move.Released = true; return *this; } - + static void Reset(ui32 tag) { SetThreadAllocTag(tag); - } - + } + void Release() { if (!Released) { SetThreadAllocTag(RestoreTag); Released = true; } } - + ~TMemoryTagScope() { if (!Released) { SetThreadAllocTag(RestoreTag); diff --git a/library/cpp/actors/prof/ut/tag_ut.cpp b/library/cpp/actors/prof/ut/tag_ut.cpp index accf3921ab..43c56ecddc 100644 --- a/library/cpp/actors/prof/ut/tag_ut.cpp +++ b/library/cpp/actors/prof/ut/tag_ut.cpp @@ -1,68 +1,68 @@ -#include "tag.h" - +#include "tag.h" + #include <library/cpp/testing/unittest/registar.h> + +using namespace NProfiling; + +class TAtomTagsTest: public TTestBase { +private: + UNIT_TEST_SUITE(TAtomTagsTest); + UNIT_TEST(Test_MakeTag); + UNIT_TEST(Test_Make2Tags); + UNIT_TEST(Test_MakeTagTwice); + + UNIT_TEST(Test_MakeAndGetTag); + + UNIT_TEST(Test_MakeVector); + UNIT_TEST_SUITE_END(); -using namespace NProfiling; - -class TAtomTagsTest: public TTestBase { -private: - UNIT_TEST_SUITE(TAtomTagsTest); - UNIT_TEST(Test_MakeTag); - UNIT_TEST(Test_Make2Tags); - UNIT_TEST(Test_MakeTagTwice); - - UNIT_TEST(Test_MakeAndGetTag); - - UNIT_TEST(Test_MakeVector); - UNIT_TEST_SUITE_END(); - -public: - void Test_MakeTag(); - void Test_Make2Tags(); - void Test_MakeTagTwice(); - void Test_MakeAndGetTag(); - void Test_MakeVector(); -}; - -UNIT_TEST_SUITE_REGISTRATION(TAtomTagsTest); - -void TAtomTagsTest::Test_MakeTag() { - ui32 tag = MakeTag("a tag"); - UNIT_ASSERT(tag != 0); -} - -void TAtomTagsTest::Test_Make2Tags() { - ui32 tag1 = MakeTag("a tag 1"); - ui32 tag2 = MakeTag("a tag 2"); - UNIT_ASSERT(tag1 != 0); - UNIT_ASSERT(tag2 != 0); - UNIT_ASSERT(tag1 != tag2); -} - -void TAtomTagsTest::Test_MakeTagTwice() { - ui32 tag1 = MakeTag("a tag twice"); - ui32 tag2 = MakeTag("a tag twice"); - UNIT_ASSERT(tag1 != 0); - UNIT_ASSERT(tag1 == tag2); -} - -void TAtomTagsTest::Test_MakeAndGetTag() { - const char* makeStr = "tag to get"; - ui32 tag = MakeTag(makeStr); - const char* tagStr = GetTag(tag); - UNIT_ASSERT_STRINGS_EQUAL(makeStr, tagStr); -} - -void TAtomTagsTest::Test_MakeVector() { +public: + void Test_MakeTag(); + void Test_Make2Tags(); + void Test_MakeTagTwice(); + void Test_MakeAndGetTag(); + void Test_MakeVector(); +}; + +UNIT_TEST_SUITE_REGISTRATION(TAtomTagsTest); + +void TAtomTagsTest::Test_MakeTag() { + ui32 tag = MakeTag("a tag"); + UNIT_ASSERT(tag != 0); +} + +void TAtomTagsTest::Test_Make2Tags() { + ui32 tag1 = MakeTag("a tag 1"); + ui32 tag2 = MakeTag("a tag 2"); + UNIT_ASSERT(tag1 != 0); + UNIT_ASSERT(tag2 != 0); + UNIT_ASSERT(tag1 != tag2); +} + +void TAtomTagsTest::Test_MakeTagTwice() { + ui32 tag1 = MakeTag("a tag twice"); + ui32 tag2 = MakeTag("a tag twice"); + UNIT_ASSERT(tag1 != 0); + UNIT_ASSERT(tag1 == tag2); +} + +void TAtomTagsTest::Test_MakeAndGetTag() { + const char* makeStr = "tag to get"; + ui32 tag = MakeTag(makeStr); + const char* tagStr = GetTag(tag); + UNIT_ASSERT_STRINGS_EQUAL(makeStr, tagStr); +} + +void TAtomTagsTest::Test_MakeVector() { TVector<const char*> strs = { - "vector tag 0", - "vector tag 1", - "vector tag 3", + "vector tag 0", + "vector tag 1", + "vector tag 3", "vector tag 4"}; - ui32 baseTag = MakeTags(strs); - UNIT_ASSERT(baseTag != 0); - for (ui32 i = 0; i < strs.size(); ++i) { - const char* str = GetTag(baseTag + i); - UNIT_ASSERT_STRINGS_EQUAL(str, strs[i]); - } -} + ui32 baseTag = MakeTags(strs); + UNIT_ASSERT(baseTag != 0); + for (ui32 i = 0; i < strs.size(); ++i) { + const char* str = GetTag(baseTag + i); + UNIT_ASSERT_STRINGS_EQUAL(str, strs[i]); + } +} diff --git a/library/cpp/actors/prof/ut/ya.make b/library/cpp/actors/prof/ut/ya.make index 47c58a8fb7..d177fbdd22 100644 --- a/library/cpp/actors/prof/ut/ya.make +++ b/library/cpp/actors/prof/ut/ya.make @@ -1,12 +1,12 @@ UNITTEST_FOR(library/cpp/actors/prof) - + OWNER( agri g:kikimr ) - -SRCS( - tag_ut.cpp -) - -END() + +SRCS( + tag_ut.cpp +) + +END() diff --git a/library/cpp/actors/prof/ya.make b/library/cpp/actors/prof/ya.make index b5e2497563..cdd3e57d1f 100644 --- a/library/cpp/actors/prof/ya.make +++ b/library/cpp/actors/prof/ya.make @@ -1,19 +1,19 @@ -LIBRARY() - +LIBRARY() + OWNER( agri g:kikimr ) - -SRCS( - tag.cpp -) - -PEERDIR( + +SRCS( + tag.cpp +) + +PEERDIR( library/cpp/charset library/cpp/containers/atomizer -) - +) + IF (PROFILE_MEMORY_ALLOCATIONS) CFLAGS(-DPROFILE_MEMORY_ALLOCATIONS) PEERDIR( @@ -30,4 +30,4 @@ ELSE() SRCS(tcmalloc_null.cpp) ENDIF() -END() +END() diff --git a/library/cpp/actors/protos/actors.proto b/library/cpp/actors/protos/actors.proto index 5fbd6d44ee..5e40cbf6c2 100644 --- a/library/cpp/actors/protos/actors.proto +++ b/library/cpp/actors/protos/actors.proto @@ -6,8 +6,8 @@ message TActorId { required fixed64 RawX1 = 1; required fixed64 RawX2 = 2; } - -message TCallbackException { + +message TCallbackException { required TActorId ActorId = 1; - required string ExceptionMessage = 2; -} + required string ExceptionMessage = 2; +} diff --git a/library/cpp/actors/protos/interconnect.proto b/library/cpp/actors/protos/interconnect.proto index 2e3b0d0d15..30a5c1bb74 100644 --- a/library/cpp/actors/protos/interconnect.proto +++ b/library/cpp/actors/protos/interconnect.proto @@ -14,7 +14,7 @@ message TEvNodeInfo { optional string Address = 2; optional uint32 Port = 3; } - + extend google.protobuf.FieldOptions { optional string PrintName = 50376; } @@ -43,19 +43,19 @@ message TScopeId { optional fixed64 X2 = 2; } -message THandshakeRequest { - required uint64 Protocol = 1; - - required uint64 ProgramPID = 2; - required uint64 ProgramStartTime = 3; - required uint64 Serial = 4; - - required uint32 ReceiverNodeId = 5; +message THandshakeRequest { + required uint64 Protocol = 1; + + required uint64 ProgramPID = 2; + required uint64 ProgramStartTime = 3; + required uint64 Serial = 4; + + required uint32 ReceiverNodeId = 5; required string SenderActorId = 6; - - optional string SenderHostName = 7; - optional string ReceiverHostName = 8; - optional string UUID = 9; + + optional string SenderHostName = 7; + optional string ReceiverHostName = 8; + optional string UUID = 9; optional TClusterUUIDs ClusterUUIDs = 13; optional bytes Ballast = 10; @@ -72,15 +72,15 @@ message THandshakeRequest { optional bool RequestModernFrame = 18; optional bool RequestAuthOnly = 19; -} - -message THandshakeSuccess { - required uint64 Protocol = 1; - - required uint64 ProgramPID = 2; - required uint64 ProgramStartTime = 3; - required uint64 Serial = 4; - +} + +message THandshakeSuccess { + required uint64 Protocol = 1; + + required uint64 ProgramPID = 2; + required uint64 ProgramStartTime = 3; + required uint64 Serial = 4; + required string SenderActorId = 5; optional string VersionTag = 6; @@ -94,13 +94,13 @@ message THandshakeSuccess { optional bool UseModernFrame = 11; optional bool AuthOnly = 12; -} - -message THandshakeReply { - optional THandshakeSuccess Success = 1; - optional string ErrorExplaination = 2; +} + +message THandshakeReply { + optional THandshakeSuccess Success = 1; + optional string ErrorExplaination = 2; optional bool CookieCheckResult = 3; -} +} message TEvLoadMessage { message THop { diff --git a/library/cpp/actors/protos/services_common.proto b/library/cpp/actors/protos/services_common.proto index afa0ec0073..99347ad37e 100644 --- a/library/cpp/actors/protos/services_common.proto +++ b/library/cpp/actors/protos/services_common.proto @@ -7,8 +7,8 @@ enum EServiceCommon { GLOBAL = 0; INTERCONNECT = 1; - TEST = 2; - PROTOCOLS = 3; + TEST = 2; + PROTOCOLS = 3; INTERCONNECT_SPEED_TEST = 4; INTERCONNECT_STATUS = 5; INTERCONNECT_NETWORK = 6; diff --git a/library/cpp/actors/protos/unittests.proto b/library/cpp/actors/protos/unittests.proto index a856b0942a..68b662b9b3 100644 --- a/library/cpp/actors/protos/unittests.proto +++ b/library/cpp/actors/protos/unittests.proto @@ -1,17 +1,17 @@ option cc_enable_arenas = true; -message TSimple { - required string Str1 = 1; - optional string Str2 = 2; - optional uint64 Number1 = 3; -} - -message TBigMessage { - repeated TSimple Simples = 1; - repeated string ManyStr = 2; - optional string OneMoreStr = 3; - optional uint64 YANumber = 4; -} +message TSimple { + required string Str1 = 1; + optional string Str2 = 2; + optional uint64 Number1 = 3; +} + +message TBigMessage { + repeated TSimple Simples = 1; + repeated string ManyStr = 2; + optional string OneMoreStr = 3; + optional uint64 YANumber = 4; +} message TMessageWithPayload { optional string Meta = 1; diff --git a/library/cpp/actors/testlib/test_runtime.cpp b/library/cpp/actors/testlib/test_runtime.cpp index 6fa25b9965..0459f76386 100644 --- a/library/cpp/actors/testlib/test_runtime.cpp +++ b/library/cpp/actors/testlib/test_runtime.cpp @@ -74,7 +74,7 @@ namespace NActors { ActorSystem->Stop(); ActorSystem.Destroy(); - Poller.Reset(); + Poller.Reset(); } TTestActorRuntimeBase::TNodeDataBase::~TNodeDataBase() { @@ -909,17 +909,17 @@ namespace NActors { case TMailboxType::Revolving: UnlockFromExecution((TMailboxTable::TRevolvingMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter); break; - case TMailboxType::HTSwap: + case TMailboxType::HTSwap: UnlockFromExecution((TMailboxTable::THTSwapMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter); - break; - case TMailboxType::ReadAsFilled: + break; + case TMailboxType::ReadAsFilled: UnlockFromExecution((TMailboxTable::TReadAsFilledMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter); - break; - case TMailboxType::TinyReadAsFilled: + break; + case TMailboxType::TinyReadAsFilled: UnlockFromExecution((TMailboxTable::TTinyReadAsFilledMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter); - break; + break; default: - Y_FAIL("Unsupported mailbox type"); + Y_FAIL("Unsupported mailbox type"); } return actorId; @@ -1645,13 +1645,13 @@ namespace NActors { setup->LocalServices = node->LocalServices; setup->Interconnect.ProxyActors.resize(FirstNodeId + NodeCount); const TActorId nameserviceId = GetNameserviceActorId(); - - TIntrusivePtr<TInterconnectProxyCommon> common; - common.Reset(new TInterconnectProxyCommon); - common->NameserviceId = nameserviceId; - common->MonCounters = interconnectCounters; + + TIntrusivePtr<TInterconnectProxyCommon> common; + common.Reset(new TInterconnectProxyCommon); + common->NameserviceId = nameserviceId; + common->MonCounters = interconnectCounters; common->TechnicalSelfHostName = "::1"; - + if (!UseRealThreads) { common->Settings.DeadPeer = TDuration::Max(); common->Settings.CloseOnIdle = TDuration::Max(); @@ -1668,7 +1668,7 @@ namespace NActors { continue; const ui32 peerNodeId = FirstNodeId + proxyNodeIndex; - + IActor *proxyActor = UseRealInterconnect ? new TInterconnectProxyTCP(peerNodeId, common) : InterconnectMock.CreateProxyMock(setup->NodeId, peerNodeId, common); diff --git a/library/cpp/actors/testlib/test_runtime.h b/library/cpp/actors/testlib/test_runtime.h index 26e3b45c98..cca5876645 100644 --- a/library/cpp/actors/testlib/test_runtime.h +++ b/library/cpp/actors/testlib/test_runtime.h @@ -556,7 +556,7 @@ namespace NActors { TIntrusivePtr<NMonitoring::TDynamicCounters> DynamicCounters; TIntrusivePtr<NActors::NLog::TSettings> LogSettings; - TIntrusivePtr<NInterconnect::TPollerThreads> Poller; + TIntrusivePtr<NInterconnect::TPollerThreads> Poller; volatile ui64* ActorSystemTimestamp; volatile ui64* ActorSystemMonotonic; TVector<std::pair<TActorId, TActorSetupCmd> > LocalServices; diff --git a/library/cpp/actors/util/funnel_queue.h b/library/cpp/actors/util/funnel_queue.h index 0e21e2617c..d760252054 100644 --- a/library/cpp/actors/util/funnel_queue.h +++ b/library/cpp/actors/util/funnel_queue.h @@ -91,62 +91,62 @@ protected: delete entry; return next; } - -protected: - struct TEntryIter { - TEntry* ptr; - - ElementType& operator*() { - return ptr->Data; - } - - ElementType* operator->() { - return &ptr->Data; - } - - TEntryIter& operator++() { - ptr = AtomicGet(ptr->Next); - return *this; - } - - bool operator!=(const TEntryIter& other) const { - return ptr != other.ptr; - } - - bool operator==(const TEntryIter& other) const { - return ptr == other.ptr; - } - }; - - struct TConstEntryIter { - const TEntry* ptr; - - const ElementType& operator*() { - return ptr->Data; - } - - const ElementType* operator->() { - return &ptr->Data; - } - - TEntryIter& operator++() { - ptr = AtomicGet(ptr->Next); - return *this; - } - - bool operator!=(const TConstEntryIter& other) const { - return ptr != other.ptr; - } - - bool operator==(const TConstEntryIter& other) const { - return ptr == other.ptr; - } - }; - -public: - using const_iterator = TConstEntryIter; - using iterator = TEntryIter; - + +protected: + struct TEntryIter { + TEntry* ptr; + + ElementType& operator*() { + return ptr->Data; + } + + ElementType* operator->() { + return &ptr->Data; + } + + TEntryIter& operator++() { + ptr = AtomicGet(ptr->Next); + return *this; + } + + bool operator!=(const TEntryIter& other) const { + return ptr != other.ptr; + } + + bool operator==(const TEntryIter& other) const { + return ptr == other.ptr; + } + }; + + struct TConstEntryIter { + const TEntry* ptr; + + const ElementType& operator*() { + return ptr->Data; + } + + const ElementType* operator->() { + return &ptr->Data; + } + + TEntryIter& operator++() { + ptr = AtomicGet(ptr->Next); + return *this; + } + + bool operator!=(const TConstEntryIter& other) const { + return ptr != other.ptr; + } + + bool operator==(const TConstEntryIter& other) const { + return ptr == other.ptr; + } + }; + +public: + using const_iterator = TConstEntryIter; + using iterator = TEntryIter; + iterator begin() { return {AtomicGet(Front)}; } @@ -156,7 +156,7 @@ public: const_iterator begin() const { return {AtomicGet(Front)}; } - + iterator end() { return {nullptr}; } diff --git a/library/cpp/actors/util/recentwnd.h b/library/cpp/actors/util/recentwnd.h index ba1ede6f29..29425301e4 100644 --- a/library/cpp/actors/util/recentwnd.h +++ b/library/cpp/actors/util/recentwnd.h @@ -1,28 +1,28 @@ -#pragma once +#pragma once -#include <util/generic/deque.h> - -template <typename TElem, +#include <util/generic/deque.h> + +template <typename TElem, template <typename, typename...> class TContainer = TDeque> -class TRecentWnd { -public: +class TRecentWnd { +public: TRecentWnd(ui32 wndSize) : MaxWndSize_(wndSize) { } - - void Push(const TElem& elem) { - if (Window_.size() == MaxWndSize_) - Window_.erase(Window_.begin()); - Window_.emplace_back(elem); - } - - void Push(TElem&& elem) { - if (Window_.size() == MaxWndSize_) - Window_.erase(Window_.begin()); - Window_.emplace_back(std::move(elem)); - } - + + void Push(const TElem& elem) { + if (Window_.size() == MaxWndSize_) + Window_.erase(Window_.begin()); + Window_.emplace_back(elem); + } + + void Push(TElem&& elem) { + if (Window_.size() == MaxWndSize_) + Window_.erase(Window_.begin()); + Window_.emplace_back(std::move(elem)); + } + TElem& Last() { return Window_.back(); } @@ -35,33 +35,33 @@ public: ui64 Size() const { return Window_.size(); } - - using const_iterator = typename TContainer<TElem>::const_iterator; - + + using const_iterator = typename TContainer<TElem>::const_iterator; + const_iterator begin() { return Window_.begin(); } const_iterator end() { return Window_.end(); } + + void Reset(ui32 wndSize = 0) { + Window_.clear(); + if (wndSize != 0) { + MaxWndSize_ = wndSize; + } + } + + void ResetWnd(ui32 wndSize) { + Y_VERIFY(wndSize != 0); + MaxWndSize_ = wndSize; + if (Window_.size() > MaxWndSize_) { + Window_.erase(Window_.begin(), + Window_.begin() + Window_.size() - MaxWndSize_); + } + } - void Reset(ui32 wndSize = 0) { - Window_.clear(); - if (wndSize != 0) { - MaxWndSize_ = wndSize; - } - } - - void ResetWnd(ui32 wndSize) { - Y_VERIFY(wndSize != 0); - MaxWndSize_ = wndSize; - if (Window_.size() > MaxWndSize_) { - Window_.erase(Window_.begin(), - Window_.begin() + Window_.size() - MaxWndSize_); - } - } - -private: - TContainer<TElem> Window_; +private: + TContainer<TElem> Window_; ui32 MaxWndSize_; -}; +}; diff --git a/library/cpp/actors/util/thread.h b/library/cpp/actors/util/thread.h index d742c8c585..d90ab745fe 100644 --- a/library/cpp/actors/util/thread.h +++ b/library/cpp/actors/util/thread.h @@ -10,17 +10,17 @@ inline void SetCurrentThreadName(const TString& name, const ui32 maxCharsFromProcessName = 8) { #if defined(_linux_) - // linux limits threadname by 15 + \0 - - TStringBuf procName(GetExecPath()); - procName = procName.RNextTok('/'); - procName = procName.SubStr(0, maxCharsFromProcessName); - + // linux limits threadname by 15 + \0 + + TStringBuf procName(GetExecPath()); + procName = procName.RNextTok('/'); + procName = procName.SubStr(0, maxCharsFromProcessName); + TStringStream linuxName; - linuxName << procName << "." << name; + linuxName << procName << "." << name; TThread::SetCurrentThreadName(linuxName.Str().data()); #else - Y_UNUSED(maxCharsFromProcessName); + Y_UNUSED(maxCharsFromProcessName); TThread::SetCurrentThreadName(name.data()); #endif } diff --git a/library/cpp/balloc/balloc.cpp b/library/cpp/balloc/balloc.cpp index fab489db4c..ea37357c13 100644 --- a/library/cpp/balloc/balloc.cpp +++ b/library/cpp/balloc/balloc.cpp @@ -32,21 +32,21 @@ namespace NBalloc { static void Y_FORCE_INLINE Free(void* ptr) { if (ptr == nullptr) { return; - } - TAllocHeader* allocHeader = ((TAllocHeader*)ptr) - 1; - size_t size = allocHeader->AllocSize; + } + TAllocHeader* allocHeader = ((TAllocHeader*)ptr) - 1; + size_t size = allocHeader->AllocSize; const size_t signature = size & SIGNATURE_MASK; if (Y_LIKELY(signature == ALIVE_SIGNATURE)) { - allocHeader->AllocSize = 0; // abort later on double free + allocHeader->AllocSize = 0; // abort later on double free #ifdef DBG_FILL_MEMORY memset(ptr, 0xde, size - signature); #endif - FreeRaw(allocHeader->Block); + FreeRaw(allocHeader->Block); if (NAllocStats::IsEnabled()) { NAllocStats::DecThreadAllocStats(size - signature); } } else if (signature == DISABLED_SIGNATURE) { - LibcFree(allocHeader->Block); + LibcFree(allocHeader->Block); } else { NMalloc::AbortFromCorruptedAllocator(); } diff --git a/library/cpp/http/io/stream.cpp b/library/cpp/http/io/stream.cpp index 6689be684f..083a531b31 100644 --- a/library/cpp/http/io/stream.cpp +++ b/library/cpp/http/io/stream.cpp @@ -145,7 +145,7 @@ public: , HasContentLength_(false) , ContentLength_(0) , ContentEncoded_(false) - , Expect100Continue_(false) + , Expect100Continue_(false) { BuildInputChain(); Y_ASSERT(Input_); @@ -204,10 +204,10 @@ public: return HasContentLength_ || ChunkedInput_; } - inline bool HasExpect100Continue() const noexcept { - return Expect100Continue_; - } - + inline bool HasExpect100Continue() const noexcept { + return Expect100Continue_; + } + private: template <class Operation> inline size_t Perform(size_t len, const Operation& operation) { @@ -324,14 +324,14 @@ private: } } [[fallthrough]]; - HEADERCMP(header, "expect") { - auto findContinue = [&](const TStringBuf& s) { + HEADERCMP(header, "expect") { + auto findContinue = [&](const TStringBuf& s) { if (strnicmp(s.data(), "100-continue", 13) == 0) { - Expect100Continue_ = true; - } - }; - ForEach(header.Value(), findContinue); - } + Expect100Continue_ = true; + } + }; + ForEach(header.Value(), findContinue); + } break; } } @@ -386,7 +386,7 @@ private: ui64 ContentLength_; bool ContentEncoded_; - bool Expect100Continue_; + bool Expect100Continue_; }; THttpInput::THttpInput(IInputStream* slave) @@ -452,10 +452,10 @@ bool THttpInput::HasContent() const noexcept { return Impl_->HasContent(); } -bool THttpInput::HasExpect100Continue() const noexcept { - return Impl_->HasExpect100Continue(); -} - +bool THttpInput::HasExpect100Continue() const noexcept { + return Impl_->HasExpect100Continue(); +} + class THttpOutput::TImpl { class TSizeCalculator: public IOutputStream { public: @@ -512,11 +512,11 @@ public: inline ~TImpl() { } - inline void SendContinue() { - Output_->Write("HTTP/1.1 100 Continue\r\n\r\n"); - Output_->Flush(); - } - + inline void SendContinue() { + Output_->Write("HTTP/1.1 100 Continue\r\n\r\n"); + Output_->Flush(); + } + inline void Write(const void* buf, size_t len) { if (Finished_) { ythrow THttpException() << "can not write to finished stream"; @@ -954,10 +954,10 @@ bool THttpOutput::CanBeKeepAlive() const noexcept { return Impl_->CanBeKeepAlive(); } -void THttpOutput::SendContinue() { - Impl_->SendContinue(); -} - +void THttpOutput::SendContinue() { + Impl_->SendContinue(); +} + const TString& THttpOutput::FirstLine() const noexcept { return Impl_->FirstLine(); } diff --git a/library/cpp/http/io/stream.h b/library/cpp/http/io/stream.h index 78ca4fc814..e0846ef107 100644 --- a/library/cpp/http/io/stream.h +++ b/library/cpp/http/io/stream.h @@ -84,8 +84,8 @@ public: /// Returns true if Content-Length or Transfer-Encoding header received bool HasContent() const noexcept; - bool HasExpect100Continue() const noexcept; - + bool HasExpect100Continue() const noexcept; + private: size_t DoRead(void* buf, size_t len) override; size_t DoSkip(size_t len) override; @@ -145,8 +145,8 @@ public: /// не завершается после окончания транзакции. bool CanBeKeepAlive() const noexcept; - void SendContinue(); - + void SendContinue(); + /* * first line - response or request */ diff --git a/library/cpp/http/server/http.cpp b/library/cpp/http/server/http.cpp index 128583bdd7..a1b70f10e1 100644 --- a/library/cpp/http/server/http.cpp +++ b/library/cpp/http/server/http.cpp @@ -67,7 +67,7 @@ public: THttpServer::TImpl* HttpServ_ = nullptr; bool Reject_ = false; TInstant LastUsed; - TInstant AcceptMoment; + TInstant AcceptMoment; size_t ReceivedRequests = 0; }; @@ -300,7 +300,7 @@ public: ~TListenSocket() override { } - void OnPollEvent(TInstant) override { + void OnPollEvent(TInstant) override { SOCKET s = ::accept(S_, nullptr, nullptr); if (s == INVALID_SOCKET) { @@ -589,7 +589,7 @@ void TClientConnection::OnPollEvent(TInstant now) { } THolder<TClientRequest> obj(HttpServ_->CreateRequest(this_)); - AcceptMoment = now; + AcceptMoment = now; HttpServ_->AddRequest(obj, Reject_); } @@ -776,10 +776,10 @@ NAddr::IRemoteAddrRef TClientRequest::GetListenerSockAddrRef() const noexcept { return Conn_->ListenerSockAddrRef_; } -TInstant TClientRequest::AcceptMoment() const noexcept { - return Conn_->AcceptMoment; -} - +TInstant TClientRequest::AcceptMoment() const noexcept { + return Conn_->AcceptMoment; +} + /* * TRequestReplier */ diff --git a/library/cpp/http/server/http.h b/library/cpp/http/server/http.h index b292d38f27..0b1607bfbb 100644 --- a/library/cpp/http/server/http.h +++ b/library/cpp/http/server/http.h @@ -8,7 +8,7 @@ #include <util/memory/blob.h> #include <util/generic/ptr.h> #include <util/generic/vector.h> -#include <util/system/atomic.h> +#include <util/system/atomic.h> class IThreadFactory; class TClientRequest; @@ -90,8 +90,8 @@ public: const IThreadPool& GetRequestQueue() const; const IThreadPool& GetFailQueue() const; - static TAtomicBase AcceptReturnsInvalidSocketCounter(); - + static TAtomicBase AcceptReturnsInvalidSocketCounter(); + private: bool MaxRequestsReached() const; @@ -120,7 +120,7 @@ public: THttpServer* HttpServ() const noexcept; const TSocket& Socket() const noexcept; NAddr::IRemoteAddrRef GetListenerSockAddrRef() const noexcept; - TInstant AcceptMoment() const noexcept; + TInstant AcceptMoment() const noexcept; bool IsLocal() const; bool CheckLoopback(); diff --git a/library/cpp/http/server/http_ex.cpp b/library/cpp/http/server/http_ex.cpp index e07db22bfc..7fb6378482 100644 --- a/library/cpp/http/server/http_ex.cpp +++ b/library/cpp/http/server/http_ex.cpp @@ -27,45 +27,45 @@ bool THttpClientRequestExtension::ProcessHeaders(TBaseServerRequestData& rd, TBl char* s = RequestString.begin(); - enum EMethod { - NotImplemented, - Get, - Post, - Put, + enum EMethod { + NotImplemented, + Get, + Post, + Put, Patch, Delete, - }; - - enum EMethod foundMethod; - char* urlStart; - + }; + + enum EMethod foundMethod; + char* urlStart; + if (strnicmp(s, "GET ", 4) == 0) { - foundMethod = Get; - urlStart = s + 4; - } else if (strnicmp(s, "POST ", 5) == 0) { - foundMethod = Post; - urlStart = s + 5; - } else if (strnicmp(s, "PUT ", 4) == 0) { - foundMethod = Put; - urlStart = s + 4; + foundMethod = Get; + urlStart = s + 4; + } else if (strnicmp(s, "POST ", 5) == 0) { + foundMethod = Post; + urlStart = s + 5; + } else if (strnicmp(s, "PUT ", 4) == 0) { + foundMethod = Put; + urlStart = s + 4; } else if (strnicmp(s, "PATCH ", 6) == 0) { foundMethod = Patch; urlStart = s + 6; } else if (strnicmp(s, "DELETE ", 7) == 0) { foundMethod = Delete; urlStart = s + 7; - } else { - foundMethod = NotImplemented; - } - - switch (foundMethod) { + } else { + foundMethod = NotImplemented; + } + + switch (foundMethod) { case Get: case Delete: if (!Parse(urlStart, rd)) { return false; } break; - + case Post: case Put: case Patch: @@ -91,8 +91,8 @@ bool THttpClientRequestExtension::ProcessHeaders(TBaseServerRequestData& rd, TBl } catch (...) { Output() << "HTTP/1.1 400 Bad request\r\n\r\n"; return false; - } - + } + if (!Parse(urlStart, rd)) { return false; } diff --git a/library/cpp/http/server/http_ut.cpp b/library/cpp/http/server/http_ut.cpp index cc62bb988e..4e0e6bd69d 100644 --- a/library/cpp/http/server/http_ut.cpp +++ b/library/cpp/http/server/http_ut.cpp @@ -137,7 +137,7 @@ Y_UNIT_TEST_SUITE(THttpServerTest) { }; static const TString CrLf = "\r\n"; - + struct TTestRequest { TTestRequest(ui16 port, TString content = TString()) : Port(port) @@ -145,23 +145,23 @@ Y_UNIT_TEST_SUITE(THttpServerTest) { { } - void CheckContinue(TSocketInput& si) { - if (Expect100Continue) { - TStringStream ss; + void CheckContinue(TSocketInput& si) { + if (Expect100Continue) { + TStringStream ss; TString firstLine; - si.ReadLine(firstLine); - for (;;) { + si.ReadLine(firstLine); + for (;;) { TString buf; - si.ReadLine(buf); + si.ReadLine(buf); if (buf.size() == 0) { - break; - } - ss << buf << CrLf; - } - UNIT_ASSERT_EQUAL(firstLine, "HTTP/1.1 100 Continue"); - } - } - + break; + } + ss << buf << CrLf; + } + UNIT_ASSERT_EQUAL(firstLine, "HTTP/1.1 100 Continue"); + } + } + TString Execute() { TSocket* s = nullptr; THolder<TSocket> singleReqSocket; @@ -176,7 +176,7 @@ Y_UNIT_TEST_SUITE(THttpServerTest) { s = singleReqSocket.Get(); } bool isPost = Type == "POST"; - TSocketInput si(*s); + TSocketInput si(*s); if (UseHttpOutput) { TSocketOutput so(*s); @@ -194,21 +194,21 @@ Y_UNIT_TEST_SUITE(THttpServerTest) { } else { r << "Transfer-Encoding: chunked" << CrLf; } - if (Expect100Continue) { - r << "Expect: 100-continue" << CrLf; - } + if (Expect100Continue) { + r << "Expect: 100-continue" << CrLf; + } } r << CrLf; if (isPost) { - output.Write(r.Str()); - output.Flush(); - CheckContinue(si); - output.Write(Content); - output.Finish(); - } else { - output.Write(r.Str()); - output.Finish(); + output.Write(r.Str()); + output.Flush(); + CheckContinue(si); + output.Write(Content); + output.Finish(); + } else { + output.Write(r.Str()); + output.Finish(); } } else { TStringStream r; @@ -222,9 +222,9 @@ Y_UNIT_TEST_SUITE(THttpServerTest) { if (EnableResponseEncoding) { r << "Accept-Encoding: gzip, deflate, x-gzip, x-deflate, y-lzo, y-lzf, y-lzq, y-bzip2, y-lzma" << CrLf; } - if (isPost && Expect100Continue) { - r << "Expect: 100-continue" << CrLf; - } + if (isPost && Expect100Continue) { + r << "Expect: 100-continue" << CrLf; + } if (isPost && ContentEncoding.size() && Content.size()) { r << "Content-Encoding: " << ContentEncoding << CrLf; TStringStream compressedContent; @@ -237,7 +237,7 @@ Y_UNIT_TEST_SUITE(THttpServerTest) { r << "Content-Length: " << compressedContent.Size() << CrLf; r << CrLf; s->Send(r.Data(), r.Size()); - CheckContinue(si); + CheckContinue(si); Hdr = r.Str(); TString tosend = compressedContent.Str(); s->Send(tosend.data(), tosend.size()); @@ -246,7 +246,7 @@ Y_UNIT_TEST_SUITE(THttpServerTest) { r << "Content-Length: " << Content.size() << CrLf; r << CrLf; s->Send(r.Data(), r.Size()); - CheckContinue(si); + CheckContinue(si); Hdr = r.Str(); s->Send(Content.data(), Content.size()); } else { @@ -286,7 +286,7 @@ Y_UNIT_TEST_SUITE(THttpServerTest) { THolder<TSocket> KeepAlivedSocket; bool EnableResponseEncoding = false; TString Hdr; - bool Expect100Continue = false; + bool Expect100Continue = false; }; class TFailingMtpQueue: public TSimpleThreadPool { @@ -354,10 +354,10 @@ Y_UNIT_TEST_SUITE(THttpServerTest) { r.ContentEncoding = encoder; for (bool expect100Continue : trueFalse) { - r.Expect100Continue = expect100Continue; + r.Expect100Continue = expect100Continue; TString resp = r.Execute(); - UNIT_ASSERT_C(resp == res, "diff echo response for request:\n" + r.GetDescription()); - } + UNIT_ASSERT_C(resp == res, "diff echo response for request:\n" + r.GetDescription()); + } } } } diff --git a/library/cpp/lfalloc/lf_allocX64.h b/library/cpp/lfalloc/lf_allocX64.h index fd2a906d6f..850e3a0b4f 100644 --- a/library/cpp/lfalloc/lf_allocX64.h +++ b/library/cpp/lfalloc/lf_allocX64.h @@ -666,7 +666,7 @@ class TLFAllocFreeList { TNode* volatile Head; TNode* volatile Pending; - TAtomic PendingToFreeListCounter; + TAtomic PendingToFreeListCounter; TAtomic AllocCount; void* Padding; @@ -682,10 +682,10 @@ class TLFAllocFreeList { TNode* res; for (res = Head; res; res = Head) { TNode* keepNext = res->Next; - if (DoCas(&Head, keepNext, res) == res) { - //Y_VERIFY(keepNext == res->Next); + if (DoCas(&Head, keepNext, res) == res) { + //Y_VERIFY(keepNext == res->Next); break; - } + } } return res; } @@ -712,19 +712,19 @@ public: Enqueue(&Pending, newFree); } Y_FORCE_INLINE void* Alloc() { - TAtomic keepCounter = AtomicAdd(PendingToFreeListCounter, 0); + TAtomic keepCounter = AtomicAdd(PendingToFreeListCounter, 0); TNode* fl = Pending; if (AtomicAdd(AllocCount, 1) == 1) { - // No other allocs in progress. - // If (keepCounter == PendingToFreeListCounter) then Pending was not freed by other threads. - // Hence Pending is not used in any concurrent DoAlloc() atm and can be safely moved to FreeList + // No other allocs in progress. + // If (keepCounter == PendingToFreeListCounter) then Pending was not freed by other threads. + // Hence Pending is not used in any concurrent DoAlloc() atm and can be safely moved to FreeList if (fl && keepCounter == AtomicAdd(PendingToFreeListCounter, 0) && DoCas(&Pending, (TNode*)nullptr, fl) == fl) { // pick first element from Pending and return it void* res = fl; fl = fl->Next; // if there are other elements in Pending list, add them to main free list FreeList(fl); - AtomicAdd(PendingToFreeListCounter, 1); + AtomicAdd(PendingToFreeListCounter, 1); AtomicAdd(AllocCount, -1); return res; } @@ -1308,7 +1308,7 @@ static void AllocThreadInfo() { struct TAllocHeader { uint64_t Size; - int Tag; + int Tag; int Cookie; }; @@ -1331,7 +1331,7 @@ static inline TAllocHeader* GetAllocHeader(void* p) { PERTHREAD int AllocationTag; extern "C" int SetThreadAllocTag(int tag) { int prevTag = AllocationTag; - if (tag < DBG_ALLOC_MAX_TAG && tag >= 0) { + if (tag < DBG_ALLOC_MAX_TAG && tag >= 0) { AllocationTag = tag; } return prevTag; @@ -1417,7 +1417,7 @@ static inline void SampleDeallocation(TAllocHeader* p, int sizeIdx) { } static inline void TrackPerTagAllocation(TAllocHeader* p, int sizeIdx) { - if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) { + if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) { Y_ASSERT_NOBT(sizeIdx < DBG_ALLOC_NUM_SIZES); auto& global = GlobalPerTagAllocCounters[p->Tag][sizeIdx]; @@ -1432,7 +1432,7 @@ static inline void TrackPerTagAllocation(TAllocHeader* p, int sizeIdx) { } static inline void TrackPerTagDeallocation(TAllocHeader* p, int sizeIdx) { - if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) { + if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) { Y_ASSERT_NOBT(sizeIdx < DBG_ALLOC_NUM_SIZES); auto& global = GlobalPerTagAllocCounters[p->Tag][sizeIdx]; @@ -1609,10 +1609,10 @@ static Y_FORCE_INLINE void LFFree(void* p) { return; } -#if defined(LFALLOC_DBG) - TrackDeallocation(p, nSizeIdx); -#endif - +#if defined(LFALLOC_DBG) + TrackDeallocation(p, nSizeIdx); +#endif + #ifdef DBG_FILL_MEMORY memset(p, 0xfe, nSizeIdxToSize[nSizeIdx]); #endif diff --git a/library/cpp/sse/sse.h b/library/cpp/sse/sse.h index 19bac17de0..918a942803 100644 --- a/library/cpp/sse/sse.h +++ b/library/cpp/sse/sse.h @@ -1,18 +1,18 @@ -#pragma once - -/* - The header chooses appropriate SSE support. - On Intel: SSE intrinsics - On ARM64: translation to NEON intrinsics or software emulation +#pragma once + +/* + The header chooses appropriate SSE support. + On Intel: SSE intrinsics + On ARM64: translation to NEON intrinsics or software emulation On PowerPc: translation to Altivec intrinsics or software emulation -*/ +*/ /* Author: Vitaliy Manushkin <agri@yandex-team.ru>, Danila Kutenin <danlark@yandex-team.ru> */ - -#include <util/system/platform.h> + +#include <util/system/platform.h> #if (defined(_i386_) || defined(_x86_64_)) && defined(_sse_) -#include <xmmintrin.h> -#include <emmintrin.h> +#include <xmmintrin.h> +#include <emmintrin.h> #include <pmmintrin.h> #define ARCADIA_SSE #if defined(_ssse3_) @@ -24,10 +24,10 @@ #if defined(_sse4_2_) #include <nmmintrin.h> #endif -#elif defined(_arm64_) -#include "sse2neon.h" +#elif defined(_arm64_) +#include "sse2neon.h" #define ARCADIA_SSE #elif defined(_ppc64_) #include "powerpc.h" #define ARCADIA_SSE -#endif +#endif diff --git a/library/cpp/sse/sse2neon.h b/library/cpp/sse/sse2neon.h index 695dbd3041..af7f3ed242 100644 --- a/library/cpp/sse/sse2neon.h +++ b/library/cpp/sse/sse2neon.h @@ -1,60 +1,60 @@ -#pragma once - -/* - The header contains inlining code - which translates SSE intrinsics to NEON intrinsics or software emulation. - You are encouraged for commitments. - Add missing intrinsics, add unittests, purify the implementation, - merge and simplify templates. - Warning: The code is made in deep nights, so it surely contains bugs, - imperfections, flaws and all other kinds of errors and mistakes. -*/ -/* Author: Vitaliy Manushkin <agri@yandex-team.ru> */ - -#include <util/system/platform.h> +#pragma once + +/* + The header contains inlining code + which translates SSE intrinsics to NEON intrinsics or software emulation. + You are encouraged for commitments. + Add missing intrinsics, add unittests, purify the implementation, + merge and simplify templates. + Warning: The code is made in deep nights, so it surely contains bugs, + imperfections, flaws and all other kinds of errors and mistakes. +*/ +/* Author: Vitaliy Manushkin <agri@yandex-team.ru> */ + +#include <util/system/platform.h> #include <util/system/compiler.h> -#include <util/system/types.h> - -#if !defined(_arm64_) -#error "This header is for ARM64 (aarch64) platform only. " \ +#include <util/system/types.h> + +#if !defined(_arm64_) +#error "This header is for ARM64 (aarch64) platform only. " \ "Include sse.h instead of including this header directly." -#endif - -#include <arm_neon.h> - -union __m128i { - uint64x2_t AsUi64x2; - int64x2_t AsSi64x2; +#endif + +#include <arm_neon.h> + +union __m128i { + uint64x2_t AsUi64x2; + int64x2_t AsSi64x2; + + uint32x4_t AsUi32x4; + int32x4_t AsSi32x4; + + uint16x8_t AsUi16x8; + int16x8_t AsSi16x8; + + uint8x16_t AsUi8x16; + int8x16_t AsSi8x16; + + float32x4_t AsFloat32x4; + float64x2_t AsFloat64x2; +}; + +union __m128 { + float32x4_t AsFloat32x4; + float64x2_t AsFloat64x2; uint32x4_t AsUi32x4; int32x4_t AsSi32x4; - uint16x8_t AsUi16x8; - int16x8_t AsSi16x8; + uint64x2_t AsUi64x2; + int64x2_t AsSi64x2; - uint8x16_t AsUi8x16; - int8x16_t AsSi8x16; - - float32x4_t AsFloat32x4; - float64x2_t AsFloat64x2; -}; - -union __m128 { - float32x4_t AsFloat32x4; - float64x2_t AsFloat64x2; - - uint32x4_t AsUi32x4; - int32x4_t AsSi32x4; - - uint64x2_t AsUi64x2; - int64x2_t AsSi64x2; - - uint8x16_t AsUi8x16; + uint8x16_t AsUi8x16; int8x16_t AsSi8x16; __m128i As128i; -}; - +}; + typedef float64x2_t __m128d; enum _mm_hint @@ -72,128 +72,128 @@ Y_FORCE_INLINE void _mm_prefetch(const void *p, enum _mm_hint) { __builtin_prefetch(p); } -template <typename TType> -struct TQType; - -template <> -struct TQType<uint8x16_t> { - static inline uint8x16_t& As(__m128i& value) { - return value.AsUi8x16; - } - static inline const uint8x16_t& As(const __m128i& value) { - return value.AsUi8x16; - } -}; - -template <> -struct TQType<int8x16_t> { - static inline int8x16_t& As(__m128i& value) { - return value.AsSi8x16; - } - static inline const int8x16_t& As(const __m128i& value) { - return value.AsSi8x16; - } -}; - -template <> -struct TQType<uint16x8_t> { - static inline uint16x8_t& As(__m128i& value) { - return value.AsUi16x8; - } - static inline const uint16x8_t& As(const __m128i& value) { - return value.AsUi16x8; - } -}; - -template <> -struct TQType<int16x8_t> { - static inline int16x8_t& As(__m128i& value) { - return value.AsSi16x8; - } - static inline const int16x8_t& As(const __m128i& value) { - return value.AsSi16x8; - } -}; - -template <> -struct TQType<uint32x4_t> { - static inline uint32x4_t& As(__m128i& value) { - return value.AsUi32x4; - } - static inline const uint32x4_t& As(const __m128i& value) { - return value.AsUi32x4; - } -}; - -template <> -struct TQType<int32x4_t> { - static inline int32x4_t& As(__m128i& value) { - return value.AsSi32x4; - } - static inline const int32x4_t& As(const __m128i& value) { - return value.AsSi32x4; - } -}; - -template <> -struct TQType<uint64x2_t> { - static inline uint64x2_t& As(__m128i& value) { - return value.AsUi64x2; - } - static inline const uint64x2_t& As(const __m128i& value) { - return value.AsUi64x2; - } - static inline uint64x2_t& As(__m128& value) { - return value.AsUi64x2; - } - static inline const uint64x2_t& As(const __m128& value) { - return value.AsUi64x2; - } -}; - -template <> -struct TQType<int64x2_t> { - static inline int64x2_t& As(__m128i& value) { - return value.AsSi64x2; - } - static inline const int64x2_t& As(const __m128i& value) { - return value.AsSi64x2; - } -}; - -template <typename TValue> -struct TBaseWrapper { - TValue Value; - +template <typename TType> +struct TQType; + +template <> +struct TQType<uint8x16_t> { + static inline uint8x16_t& As(__m128i& value) { + return value.AsUi8x16; + } + static inline const uint8x16_t& As(const __m128i& value) { + return value.AsUi8x16; + } +}; + +template <> +struct TQType<int8x16_t> { + static inline int8x16_t& As(__m128i& value) { + return value.AsSi8x16; + } + static inline const int8x16_t& As(const __m128i& value) { + return value.AsSi8x16; + } +}; + +template <> +struct TQType<uint16x8_t> { + static inline uint16x8_t& As(__m128i& value) { + return value.AsUi16x8; + } + static inline const uint16x8_t& As(const __m128i& value) { + return value.AsUi16x8; + } +}; + +template <> +struct TQType<int16x8_t> { + static inline int16x8_t& As(__m128i& value) { + return value.AsSi16x8; + } + static inline const int16x8_t& As(const __m128i& value) { + return value.AsSi16x8; + } +}; + +template <> +struct TQType<uint32x4_t> { + static inline uint32x4_t& As(__m128i& value) { + return value.AsUi32x4; + } + static inline const uint32x4_t& As(const __m128i& value) { + return value.AsUi32x4; + } +}; + +template <> +struct TQType<int32x4_t> { + static inline int32x4_t& As(__m128i& value) { + return value.AsSi32x4; + } + static inline const int32x4_t& As(const __m128i& value) { + return value.AsSi32x4; + } +}; + +template <> +struct TQType<uint64x2_t> { + static inline uint64x2_t& As(__m128i& value) { + return value.AsUi64x2; + } + static inline const uint64x2_t& As(const __m128i& value) { + return value.AsUi64x2; + } + static inline uint64x2_t& As(__m128& value) { + return value.AsUi64x2; + } + static inline const uint64x2_t& As(const __m128& value) { + return value.AsUi64x2; + } +}; + +template <> +struct TQType<int64x2_t> { + static inline int64x2_t& As(__m128i& value) { + return value.AsSi64x2; + } + static inline const int64x2_t& As(const __m128i& value) { + return value.AsSi64x2; + } +}; + +template <typename TValue> +struct TBaseWrapper { + TValue Value; + Y_FORCE_INLINE - operator TValue&() { - return Value; - } - + operator TValue&() { + return Value; + } + Y_FORCE_INLINE - operator const TValue&() const { - return Value; - } -}; - -template <typename TOp, typename TFunc, TFunc* func, - typename TDup, TDup* dupfunc> -struct TWrapperSingleDup: public TBaseWrapper<__m128i> { + operator const TValue&() const { + return Value; + } +}; + +template <typename TOp, typename TFunc, TFunc* func, + typename TDup, TDup* dupfunc> +struct TWrapperSingleDup: public TBaseWrapper<__m128i> { Y_FORCE_INLINE - TWrapperSingleDup(const __m128i& op, const int shift) { - TQType<TOp>::As(Value) = func(TQType<TOp>::As(op), dupfunc(shift)); - } -}; - -template <typename TOp, typename TFunc, TFunc* func, - typename TDup, TDup* dupfunc> -struct TWrapperSingleNegDup: public TBaseWrapper<__m128i> { + TWrapperSingleDup(const __m128i& op, const int shift) { + TQType<TOp>::As(Value) = func(TQType<TOp>::As(op), dupfunc(shift)); + } +}; + +template <typename TOp, typename TFunc, TFunc* func, + typename TDup, TDup* dupfunc> +struct TWrapperSingleNegDup: public TBaseWrapper<__m128i> { Y_FORCE_INLINE - TWrapperSingleNegDup(const __m128i& op, const int shift) { - TQType<TOp>::As(Value) = func(TQType<TOp>::As(op), dupfunc(-shift)); - } -}; - + TWrapperSingleNegDup(const __m128i& op, const int shift) { + TQType<TOp>::As(Value) = func(TQType<TOp>::As(op), dupfunc(-shift)); + } +}; + inline __m128i _mm_srl_epi16(__m128i a, __m128i count) { __m128i res; res.AsUi16x8 = vshlq_u16(a.AsUi16x8, vdupq_n_s16(-count.AsUi16x8[0])); @@ -225,16 +225,16 @@ inline __m128i _mm_srai_epi32(__m128i a, int count) { return res; } -using _mm_srli_epi16 = - TWrapperSingleNegDup<uint16x8_t, decltype(vshlq_u16), vshlq_u16, - decltype(vdupq_n_s16), vdupq_n_s16>; -using _mm_srli_epi32 = - TWrapperSingleNegDup<uint32x4_t, decltype(vshlq_u32), vshlq_u32, - decltype(vdupq_n_s32), vdupq_n_s32>; -using _mm_srli_epi64 = - TWrapperSingleNegDup<uint64x2_t, decltype(vshlq_u64), vshlq_u64, - decltype(vdupq_n_s64), vdupq_n_s64>; - +using _mm_srli_epi16 = + TWrapperSingleNegDup<uint16x8_t, decltype(vshlq_u16), vshlq_u16, + decltype(vdupq_n_s16), vdupq_n_s16>; +using _mm_srli_epi32 = + TWrapperSingleNegDup<uint32x4_t, decltype(vshlq_u32), vshlq_u32, + decltype(vdupq_n_s32), vdupq_n_s32>; +using _mm_srli_epi64 = + TWrapperSingleNegDup<uint64x2_t, decltype(vshlq_u64), vshlq_u64, + decltype(vdupq_n_s64), vdupq_n_s64>; + inline __m128i _mm_sll_epi16(__m128i a, __m128i count) { __m128i res; @@ -255,57 +255,57 @@ inline __m128i _mm_sll_epi64(__m128i a, __m128i count) { return res; } -using _mm_slli_epi16 = - TWrapperSingleDup<uint16x8_t, decltype(vshlq_u16), vshlq_u16, - decltype(vdupq_n_s16), vdupq_n_s16>; -using _mm_slli_epi32 = - TWrapperSingleDup<uint32x4_t, decltype(vshlq_u32), vshlq_u32, - decltype(vdupq_n_s32), vdupq_n_s32>; -using _mm_slli_epi64 = - TWrapperSingleDup<uint64x2_t, decltype(vshlq_u64), vshlq_u64, - decltype(vdupq_n_s64), vdupq_n_s64>; - -template <typename TOp, typename TFunc, TFunc* func, typename... TParams> -struct TWrapperDual : TBaseWrapper<__m128i> { +using _mm_slli_epi16 = + TWrapperSingleDup<uint16x8_t, decltype(vshlq_u16), vshlq_u16, + decltype(vdupq_n_s16), vdupq_n_s16>; +using _mm_slli_epi32 = + TWrapperSingleDup<uint32x4_t, decltype(vshlq_u32), vshlq_u32, + decltype(vdupq_n_s32), vdupq_n_s32>; +using _mm_slli_epi64 = + TWrapperSingleDup<uint64x2_t, decltype(vshlq_u64), vshlq_u64, + decltype(vdupq_n_s64), vdupq_n_s64>; + +template <typename TOp, typename TFunc, TFunc* func, typename... TParams> +struct TWrapperDual : TBaseWrapper<__m128i> { Y_FORCE_INLINE - TWrapperDual(const __m128i& op1, const __m128i& op2, TParams... params) { - TQType<TOp>::As(Value) = (TOp) - func(TQType<TOp>::As(op1), - TQType<TOp>::As(op2), - params...); - } -}; - -template <typename TOp, typename TFunc, TFunc* func, typename... TParams> -struct TWrapperDualSwap : TBaseWrapper<__m128i> { + TWrapperDual(const __m128i& op1, const __m128i& op2, TParams... params) { + TQType<TOp>::As(Value) = (TOp) + func(TQType<TOp>::As(op1), + TQType<TOp>::As(op2), + params...); + } +}; + +template <typename TOp, typename TFunc, TFunc* func, typename... TParams> +struct TWrapperDualSwap : TBaseWrapper<__m128i> { Y_FORCE_INLINE - TWrapperDualSwap(const __m128i& op1, const __m128i& op2, TParams... params) { - TQType<TOp>::As(Value) = - func(TQType<TOp>::As(op2), - TQType<TOp>::As(op1), - params...); - } -}; - + TWrapperDualSwap(const __m128i& op1, const __m128i& op2, TParams... params) { + TQType<TOp>::As(Value) = + func(TQType<TOp>::As(op2), + TQType<TOp>::As(op1), + params...); + } +}; + template <typename TOp, typename TFunc, TFunc* func, typename TArgument = __m128> struct TWrapperDualF : TBaseWrapper<TArgument> { Y_FORCE_INLINE TWrapperDualF(const TArgument& op1, const TArgument& op2) { TQType<TOp>::As(TBaseWrapper<TArgument>::Value) = (TOp) func(TQType<TOp>::As(op1), TQType<TOp>::As(op2)); - } -}; - -using _mm_or_si128 = TWrapperDual<uint64x2_t, decltype(vorrq_u64), vorrq_u64>; -using _mm_and_si128 = TWrapperDual<uint64x2_t, decltype(vandq_u64), vandq_u64>; -using _mm_andnot_si128 = - TWrapperDualSwap<uint64x2_t, decltype(vbicq_u64), vbicq_u64>; + } +}; + +using _mm_or_si128 = TWrapperDual<uint64x2_t, decltype(vorrq_u64), vorrq_u64>; +using _mm_and_si128 = TWrapperDual<uint64x2_t, decltype(vandq_u64), vandq_u64>; +using _mm_andnot_si128 = + TWrapperDualSwap<uint64x2_t, decltype(vbicq_u64), vbicq_u64>; using _mm_xor_si128 = TWrapperDual<uint64x2_t, decltype(veorq_u64), veorq_u64>; - + using _mm_add_epi8 = TWrapperDual<uint8x16_t, decltype(vaddq_u8), vaddq_u8>; -using _mm_add_epi16 = TWrapperDual<uint16x8_t, decltype(vaddq_u16), vaddq_u16>; -using _mm_add_epi32 = TWrapperDual<uint32x4_t, decltype(vaddq_u32), vaddq_u32>; -using _mm_add_epi64 = TWrapperDual<uint64x2_t, decltype(vaddq_u64), vaddq_u64>; - +using _mm_add_epi16 = TWrapperDual<uint16x8_t, decltype(vaddq_u16), vaddq_u16>; +using _mm_add_epi32 = TWrapperDual<uint32x4_t, decltype(vaddq_u32), vaddq_u32>; +using _mm_add_epi64 = TWrapperDual<uint64x2_t, decltype(vaddq_u64), vaddq_u64>; + inline __m128i _mm_madd_epi16(__m128i a, __m128i b) { int32x4_t aLow; int32x4_t aHigh; @@ -343,118 +343,118 @@ inline __m128i _mm_madd_epi16(__m128i a, __m128i b) { } using _mm_sub_epi8 = TWrapperDual<uint8x16_t, decltype(vsubq_u8), vsubq_u8>; -using _mm_sub_epi16 = TWrapperDual<uint16x8_t, decltype(vsubq_u16), vsubq_u16>; -using _mm_sub_epi32 = TWrapperDual<uint32x4_t, decltype(vsubq_u32), vsubq_u32>; -using _mm_sub_epi64 = TWrapperDual<uint64x2_t, decltype(vsubq_u64), vsubq_u64>; - -using _mm_unpacklo_epi8 = - TWrapperDual<uint8x16_t, decltype(vzip1q_u8), vzip1q_u8>; -using _mm_unpackhi_epi8 = - TWrapperDual<uint8x16_t, decltype(vzip2q_u8), vzip2q_u8>; -using _mm_unpacklo_epi16 = - TWrapperDual<uint16x8_t, decltype(vzip1q_u16), vzip1q_u16>; -using _mm_unpackhi_epi16 = - TWrapperDual<uint16x8_t, decltype(vzip2q_u16), vzip2q_u16>; -using _mm_unpacklo_epi32 = - TWrapperDual<uint32x4_t, decltype(vzip1q_u32), vzip1q_u32>; -using _mm_unpackhi_epi32 = - TWrapperDual<uint32x4_t, decltype(vzip2q_u32), vzip2q_u32>; -using _mm_unpacklo_epi64 = - TWrapperDual<uint64x2_t, decltype(vzip1q_u64), vzip1q_u64>; -using _mm_unpackhi_epi64 = - TWrapperDual<uint64x2_t, decltype(vzip2q_u64), vzip2q_u64>; - -using _mm_cmpeq_epi8 = - TWrapperDual<uint8x16_t, decltype(vceqq_u8), vceqq_u8>; -using _mm_cmpeq_epi16 = - TWrapperDual<uint16x8_t, decltype(vceqq_u16), vceqq_u16>; -using _mm_cmpeq_epi32 = - TWrapperDual<uint32x4_t, decltype(vceqq_u32), vceqq_u32>; - -using _mm_cmpgt_epi8 = - TWrapperDual<int8x16_t, decltype(vcgtq_s8), vcgtq_s8>; -using _mm_cmpgt_epi16 = - TWrapperDual<int16x8_t, decltype(vcgtq_s16), vcgtq_s16>; -using _mm_cmpgt_epi32 = - TWrapperDual<int32x4_t, decltype(vcgtq_s32), vcgtq_s32>; - -using _mm_cmplt_epi8 = - TWrapperDual<int8x16_t, decltype(vcltq_s8), vcltq_s8>; -using _mm_cmplt_epi16 = - TWrapperDual<int16x8_t, decltype(vcltq_s16), vcltq_s16>; -using _mm_cmplt_epi32 = - TWrapperDual<int32x4_t, decltype(vcltq_s32), vcltq_s32>; - +using _mm_sub_epi16 = TWrapperDual<uint16x8_t, decltype(vsubq_u16), vsubq_u16>; +using _mm_sub_epi32 = TWrapperDual<uint32x4_t, decltype(vsubq_u32), vsubq_u32>; +using _mm_sub_epi64 = TWrapperDual<uint64x2_t, decltype(vsubq_u64), vsubq_u64>; + +using _mm_unpacklo_epi8 = + TWrapperDual<uint8x16_t, decltype(vzip1q_u8), vzip1q_u8>; +using _mm_unpackhi_epi8 = + TWrapperDual<uint8x16_t, decltype(vzip2q_u8), vzip2q_u8>; +using _mm_unpacklo_epi16 = + TWrapperDual<uint16x8_t, decltype(vzip1q_u16), vzip1q_u16>; +using _mm_unpackhi_epi16 = + TWrapperDual<uint16x8_t, decltype(vzip2q_u16), vzip2q_u16>; +using _mm_unpacklo_epi32 = + TWrapperDual<uint32x4_t, decltype(vzip1q_u32), vzip1q_u32>; +using _mm_unpackhi_epi32 = + TWrapperDual<uint32x4_t, decltype(vzip2q_u32), vzip2q_u32>; +using _mm_unpacklo_epi64 = + TWrapperDual<uint64x2_t, decltype(vzip1q_u64), vzip1q_u64>; +using _mm_unpackhi_epi64 = + TWrapperDual<uint64x2_t, decltype(vzip2q_u64), vzip2q_u64>; + +using _mm_cmpeq_epi8 = + TWrapperDual<uint8x16_t, decltype(vceqq_u8), vceqq_u8>; +using _mm_cmpeq_epi16 = + TWrapperDual<uint16x8_t, decltype(vceqq_u16), vceqq_u16>; +using _mm_cmpeq_epi32 = + TWrapperDual<uint32x4_t, decltype(vceqq_u32), vceqq_u32>; + +using _mm_cmpgt_epi8 = + TWrapperDual<int8x16_t, decltype(vcgtq_s8), vcgtq_s8>; +using _mm_cmpgt_epi16 = + TWrapperDual<int16x8_t, decltype(vcgtq_s16), vcgtq_s16>; +using _mm_cmpgt_epi32 = + TWrapperDual<int32x4_t, decltype(vcgtq_s32), vcgtq_s32>; + +using _mm_cmplt_epi8 = + TWrapperDual<int8x16_t, decltype(vcltq_s8), vcltq_s8>; +using _mm_cmplt_epi16 = + TWrapperDual<int16x8_t, decltype(vcltq_s16), vcltq_s16>; +using _mm_cmplt_epi32 = + TWrapperDual<int32x4_t, decltype(vcltq_s32), vcltq_s32>; + Y_FORCE_INLINE __m128i _mm_load_si128(const __m128i* ptr) { - __m128i result; + __m128i result; result.AsUi64x2 = vld1q_u64((const uint64_t*)ptr); - return result; -} - + return result; +} + Y_FORCE_INLINE __m128i _mm_loadu_si128(const __m128i* ptr) { - __m128i result; + __m128i result; result.AsUi64x2 = vld1q_u64((const uint64_t*)ptr); - return result; -} - + return result; +} + Y_FORCE_INLINE __m128i _mm_lddqu_si128(const __m128i* ptr) { return _mm_loadu_si128(ptr); } Y_FORCE_INLINE void _mm_storeu_si128(__m128i* ptr, const __m128i& op) { vst1q_u64((uint64_t*)ptr, op.AsUi64x2); -} - +} + Y_FORCE_INLINE void -_mm_store_si128(__m128i* ptr, const __m128i& op) { +_mm_store_si128(__m128i* ptr, const __m128i& op) { vst1q_u64((uint64_t*)ptr, op.AsUi64x2); -} - -template <typename TOp, typename TFunc, TFunc* func, typename... TParams> -struct TWrapperSimple : TBaseWrapper<__m128i> { +} + +template <typename TOp, typename TFunc, TFunc* func, typename... TParams> +struct TWrapperSimple : TBaseWrapper<__m128i> { Y_FORCE_INLINE - TWrapperSimple(TParams... params) { - TQType<TOp>::As(Value) = func(params...); - } -}; - -template <typename TOp, typename TFunc, TFunc* func, typename... TParams> -struct TWrapperSimpleF : TBaseWrapper<__m128> { + TWrapperSimple(TParams... params) { + TQType<TOp>::As(Value) = func(params...); + } +}; + +template <typename TOp, typename TFunc, TFunc* func, typename... TParams> +struct TWrapperSimpleF : TBaseWrapper<__m128> { Y_FORCE_INLINE - TWrapperSimpleF(TParams... params) { - TQType<TOp>::As(Value) = func(params...); - } -}; - -using _mm_set1_epi8 = - TWrapperSimple<int8x16_t, decltype(vdupq_n_s8), vdupq_n_s8, const char>; -using _mm_set1_epi16 = - TWrapperSimple<int16x8_t, decltype(vdupq_n_s16), vdupq_n_s16, const ui16>; -using _mm_set1_epi32 = - TWrapperSimple<int32x4_t, decltype(vdupq_n_s32), vdupq_n_s32, const ui32>; - -struct _mm_setzero_si128 : TBaseWrapper<__m128i> { + TWrapperSimpleF(TParams... params) { + TQType<TOp>::As(Value) = func(params...); + } +}; + +using _mm_set1_epi8 = + TWrapperSimple<int8x16_t, decltype(vdupq_n_s8), vdupq_n_s8, const char>; +using _mm_set1_epi16 = + TWrapperSimple<int16x8_t, decltype(vdupq_n_s16), vdupq_n_s16, const ui16>; +using _mm_set1_epi32 = + TWrapperSimple<int32x4_t, decltype(vdupq_n_s32), vdupq_n_s32, const ui32>; + +struct _mm_setzero_si128 : TBaseWrapper<__m128i> { Y_FORCE_INLINE - _mm_setzero_si128() { - TQType<uint64x2_t>::As(Value) = vdupq_n_u64(0); - } -}; - -struct _mm_loadl_epi64 : TBaseWrapper<__m128i> { + _mm_setzero_si128() { + TQType<uint64x2_t>::As(Value) = vdupq_n_u64(0); + } +}; + +struct _mm_loadl_epi64 : TBaseWrapper<__m128i> { Y_FORCE_INLINE - _mm_loadl_epi64(const __m128i* p) { + _mm_loadl_epi64(const __m128i* p) { uint64x1_t im = vld1_u64((const uint64_t*)p); - TQType<uint64x2_t>::As(Value) = vcombine_u64(im, vdup_n_u64(0)); - } -}; - -struct _mm_storel_epi64 : TBaseWrapper<__m128i> { + TQType<uint64x2_t>::As(Value) = vcombine_u64(im, vdup_n_u64(0)); + } +}; + +struct _mm_storel_epi64 : TBaseWrapper<__m128i> { Y_FORCE_INLINE - _mm_storel_epi64(__m128i* a, __m128i op) { + _mm_storel_epi64(__m128i* a, __m128i op) { vst1_u64((uint64_t*)a, vget_low_u64(op.AsUi64x2)); - } -}; - + } +}; + struct ShuffleStruct4 { ui8 x[4]; }; @@ -470,45 +470,45 @@ _MM_SHUFFLE(ui8 x4, ui8 x3, ui8 x2, ui8 x1) { } Y_FORCE_INLINE __m128i -_mm_shuffle_epi32(const __m128i& op1, const ShuffleStruct4& op2) { - __m128i result; +_mm_shuffle_epi32(const __m128i& op1, const ShuffleStruct4& op2) { + __m128i result; const ui8 xi[4] = { ui8(op2.x[0] * 4), ui8(op2.x[1] * 4), ui8(op2.x[2] * 4), ui8(op2.x[3] * 4) }; const uint8x16_t transform = { - ui8(xi[0]), ui8(xi[0] + 1), ui8(xi[0] + 2), ui8(xi[0] + 3), - ui8(xi[1]), ui8(xi[1] + 1), ui8(xi[1] + 2), ui8(xi[1] + 3), - ui8(xi[2]), ui8(xi[2] + 1), ui8(xi[2] + 2), ui8(xi[2] + 3), + ui8(xi[0]), ui8(xi[0] + 1), ui8(xi[0] + 2), ui8(xi[0] + 3), + ui8(xi[1]), ui8(xi[1] + 1), ui8(xi[1] + 2), ui8(xi[1] + 3), + ui8(xi[2]), ui8(xi[2] + 1), ui8(xi[2] + 2), ui8(xi[2] + 3), ui8(xi[3]), ui8(xi[3] + 1), ui8(xi[3] + 2), ui8(xi[3] + 3) }; - result.AsUi8x16 = vqtbl1q_u8(op1.AsUi8x16, transform); - return result; -} - + result.AsUi8x16 = vqtbl1q_u8(op1.AsUi8x16, transform); + return result; +} + Y_FORCE_INLINE int -_mm_movemask_epi8(const __m128i& op) { - uint8x16_t mask = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}; - uint8x16_t opmasked = vandq_u8(op.AsUi8x16, mask); - int8x16_t byteshifter = { - 0, -7, 0, -7, 0, -7, 0, -7, 0, -7, 0, -7, 0, -7, 0, -7}; - uint8x16_t opshifted = vshlq_u8(opmasked, byteshifter); - int16x8_t wordshifter = {-7, -5, -3, -1, 1, 3, 5, 7}; - uint16x8_t wordshifted = - vshlq_u16(vreinterpretq_u16_u8(opshifted), wordshifter); - return vaddvq_u16(wordshifted); -} - -template <int imm> -struct THelper_mm_srli_si128 : TBaseWrapper<__m128i> { +_mm_movemask_epi8(const __m128i& op) { + uint8x16_t mask = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}; + uint8x16_t opmasked = vandq_u8(op.AsUi8x16, mask); + int8x16_t byteshifter = { + 0, -7, 0, -7, 0, -7, 0, -7, 0, -7, 0, -7, 0, -7, 0, -7}; + uint8x16_t opshifted = vshlq_u8(opmasked, byteshifter); + int16x8_t wordshifter = {-7, -5, -3, -1, 1, 3, 5, 7}; + uint16x8_t wordshifted = + vshlq_u16(vreinterpretq_u16_u8(opshifted), wordshifter); + return vaddvq_u16(wordshifted); +} + +template <int imm> +struct THelper_mm_srli_si128 : TBaseWrapper<__m128i> { Y_FORCE_INLINE THelper_mm_srli_si128(const __m128i a) { const auto zero = vdupq_n_u8(0); - TQType<uint8x16_t>::As(Value) = vextq_u8(a.AsUi8x16, zero, imm); - } -}; - + TQType<uint8x16_t>::As(Value) = vextq_u8(a.AsUi8x16, zero, imm); + } +}; + template <> struct THelper_mm_srli_si128<16> : TBaseWrapper<__m128i> { Y_FORCE_INLINE @@ -518,8 +518,8 @@ struct THelper_mm_srli_si128<16> : TBaseWrapper<__m128i> { } }; -#define _mm_srli_si128(a, imm) THelper_mm_srli_si128<imm>(a) - +#define _mm_srli_si128(a, imm) THelper_mm_srli_si128<imm>(a) + template<int imm> inline uint8x16_t vextq_u8_function(uint8x16_t a, uint8x16_t b) { return vextq_u8(a, b, imm); @@ -531,33 +531,33 @@ inline uint8x16_t vextq_u8_function<16>(uint8x16_t /* a */, uint8x16_t b) { } -template <int imm> -struct THelper_mm_slli_si128 : TBaseWrapper<__m128i> { +template <int imm> +struct THelper_mm_slli_si128 : TBaseWrapper<__m128i> { Y_FORCE_INLINE THelper_mm_slli_si128(const __m128i a) { - auto zero = vdupq_n_u8(0); + auto zero = vdupq_n_u8(0); TQType<uint8x16_t>::As(Value) = vextq_u8_function<16 - imm>(zero, a.AsUi8x16); - } -}; - -#define _mm_slli_si128(a, imm) THelper_mm_slli_si128<imm>(a) - + } +}; + +#define _mm_slli_si128(a, imm) THelper_mm_slli_si128<imm>(a) + Y_FORCE_INLINE int _mm_cvtsi128_si32(const __m128i& op) { - return vgetq_lane_s32(op.AsSi32x4, 0); -} - -struct _mm_set_epi16 : TBaseWrapper<__m128i> { + return vgetq_lane_s32(op.AsSi32x4, 0); +} + +struct _mm_set_epi16 : TBaseWrapper<__m128i> { Y_FORCE_INLINE - _mm_set_epi16(const short w7, const short w6, - const short w5, const short w4, - const short w3, const short w2, - const short w1, const short w0) { - int16x4_t d0 = {w0, w1, w2, w3}; - int16x4_t d1 = {w4, w5, w6, w7}; - TQType<int16x8_t>::As(Value) = vcombine_s16(d0, d1); - } -}; - + _mm_set_epi16(const short w7, const short w6, + const short w5, const short w4, + const short w3, const short w2, + const short w1, const short w0) { + int16x4_t d0 = {w0, w1, w2, w3}; + int16x4_t d1 = {w4, w5, w6, w7}; + TQType<int16x8_t>::As(Value) = vcombine_s16(d0, d1); + } +}; + struct _mm_setr_epi16 : TBaseWrapper<__m128i> { Y_FORCE_INLINE _mm_setr_epi16(const short w7, const short w6, @@ -570,16 +570,16 @@ struct _mm_setr_epi16 : TBaseWrapper<__m128i> { } }; -struct _mm_set_epi32 : TBaseWrapper<__m128i> { +struct _mm_set_epi32 : TBaseWrapper<__m128i> { Y_FORCE_INLINE - _mm_set_epi32(const int x3, const int x2, - const int x1, const int x0) { - int32x2_t d0 = {x0, x1}; - int32x2_t d1 = {x2, x3}; - TQType<int32x4_t>::As(Value) = vcombine_s32(d0, d1); - } -}; - + _mm_set_epi32(const int x3, const int x2, + const int x1, const int x0) { + int32x2_t d0 = {x0, x1}; + int32x2_t d1 = {x2, x3}; + TQType<int32x4_t>::As(Value) = vcombine_s32(d0, d1); + } +}; + struct _mm_setr_epi32 : TBaseWrapper<__m128i> { Y_FORCE_INLINE _mm_setr_epi32(const int x3, const int x2, @@ -590,14 +590,14 @@ struct _mm_setr_epi32 : TBaseWrapper<__m128i> { } }; -struct _mm_cvtsi32_si128 : TBaseWrapper<__m128i> { +struct _mm_cvtsi32_si128 : TBaseWrapper<__m128i> { Y_FORCE_INLINE - _mm_cvtsi32_si128(int op) { - auto zero = vdupq_n_s32(0); - TQType<int32x4_t>::As(Value) = vsetq_lane_s32(op, zero, 0); - } -}; - + _mm_cvtsi32_si128(int op) { + auto zero = vdupq_n_s32(0); + TQType<int32x4_t>::As(Value) = vsetq_lane_s32(op, zero, 0); + } +}; + struct _mm_cvtsi64_si128 : TBaseWrapper<__m128i> { Y_FORCE_INLINE _mm_cvtsi64_si128(i64 op) { @@ -606,41 +606,41 @@ struct _mm_cvtsi64_si128 : TBaseWrapper<__m128i> { } }; -template <typename TOpOut, typename TOpIn, - typename TFunc, TFunc* func, - typename TCombine, TCombine* combine> -struct TCombineWrapper : TBaseWrapper<__m128i> { +template <typename TOpOut, typename TOpIn, + typename TFunc, TFunc* func, + typename TCombine, TCombine* combine> +struct TCombineWrapper : TBaseWrapper<__m128i> { Y_FORCE_INLINE - TCombineWrapper(const __m128i op1, const __m128i op2) { - TQType<TOpOut>::As(Value) = - combine(func(TQType<TOpIn>::As(op1)), - func(TQType<TOpIn>::As(op2))); - } -}; - -using _mm_packs_epi16 = - TCombineWrapper<int8x16_t, int16x8_t, - decltype(vqmovn_s16), vqmovn_s16, - decltype(vcombine_s8), vcombine_s8>; -using _mm_packs_epi32 = - TCombineWrapper<int16x8_t, int32x4_t, - decltype(vqmovn_s32), vqmovn_s32, - decltype(vcombine_s16), vcombine_s16>; -using _mm_packus_epi16 = - TCombineWrapper<uint8x16_t, int16x8_t, - decltype(vqmovun_s16), vqmovun_s16, - decltype(vcombine_u8), vcombine_u8>; - -template <typename TOpOut, typename TOpIn, - typename TFunc, TFunc* func, typename... TParams> -struct TScalarOutWrapper : TBaseWrapper<TOpOut> { + TCombineWrapper(const __m128i op1, const __m128i op2) { + TQType<TOpOut>::As(Value) = + combine(func(TQType<TOpIn>::As(op1)), + func(TQType<TOpIn>::As(op2))); + } +}; + +using _mm_packs_epi16 = + TCombineWrapper<int8x16_t, int16x8_t, + decltype(vqmovn_s16), vqmovn_s16, + decltype(vcombine_s8), vcombine_s8>; +using _mm_packs_epi32 = + TCombineWrapper<int16x8_t, int32x4_t, + decltype(vqmovn_s32), vqmovn_s32, + decltype(vcombine_s16), vcombine_s16>; +using _mm_packus_epi16 = + TCombineWrapper<uint8x16_t, int16x8_t, + decltype(vqmovun_s16), vqmovun_s16, + decltype(vcombine_u8), vcombine_u8>; + +template <typename TOpOut, typename TOpIn, + typename TFunc, TFunc* func, typename... TParams> +struct TScalarOutWrapper : TBaseWrapper<TOpOut> { Y_FORCE_INLINE - TScalarOutWrapper(const __m128i op, TParams... params) { - TBaseWrapper<TOpOut>::Value = - func(TQType<TOpIn>::As(op), params...); - } -}; - + TScalarOutWrapper(const __m128i op, TParams... params) { + TBaseWrapper<TOpOut>::Value = + func(TQType<TOpIn>::As(op), params...); + } +}; + template<int imm> int extract_epi8_arm(__m128i arg) { return vgetq_lane_u8(arg.AsUi8x16, imm); @@ -649,13 +649,13 @@ int extract_epi8_arm(__m128i arg) { template<int imm> int extract_epi16_arm(__m128i arg) { return vgetq_lane_u16(arg.AsUi16x8, imm); -} - +} + template<int imm> int extract_epi32_arm(__m128i arg) { return vgetq_lane_s32(arg.AsSi32x4, imm); } - + template<int imm> long long extract_epi64_arm(__m128i arg) { return vgetq_lane_s64(arg.AsSi64x2, imm); @@ -669,49 +669,49 @@ long long extract_epi64_arm(__m128i arg) { static Y_FORCE_INLINE __m128i _mm_mul_epu32(__m128i op1, __m128i op2) { - __m128i result; - uint32x4_t r1 = vuzp1q_u32(op1.AsUi32x4, op2.AsUi32x4); - uint32x4_t r2 = vuzp1q_u32(op2.AsUi32x4, op1.AsUi32x4); - result.AsUi64x2 = vmull_u32(vget_low_u32(r1), vget_low_u32(r2)); - return result; -} - -template <> -struct TQType<float32x4_t> { - static inline float32x4_t& As(__m128& value) { - return value.AsFloat32x4; - } - - static inline const float32x4_t& As(const __m128& value) { - return value.AsFloat32x4; - } - - static inline float32x4_t& As(__m128i& value) { - return value.AsFloat32x4; - } - - static inline const float32x4_t& As(const __m128i& value) { - return value.AsFloat32x4; - } -}; - -template <> -struct TQType<float64x2_t> { - static inline float64x2_t& As(__m128& value) { - return value.AsFloat64x2; - } - - static inline const float64x2_t& As(const __m128& value) { - return value.AsFloat64x2; - } - - static inline float64x2_t& As(__m128i& value) { - return value.AsFloat64x2; - } - - static inline const float64x2_t& As(const __m128i& value) { - return value.AsFloat64x2; - } + __m128i result; + uint32x4_t r1 = vuzp1q_u32(op1.AsUi32x4, op2.AsUi32x4); + uint32x4_t r2 = vuzp1q_u32(op2.AsUi32x4, op1.AsUi32x4); + result.AsUi64x2 = vmull_u32(vget_low_u32(r1), vget_low_u32(r2)); + return result; +} + +template <> +struct TQType<float32x4_t> { + static inline float32x4_t& As(__m128& value) { + return value.AsFloat32x4; + } + + static inline const float32x4_t& As(const __m128& value) { + return value.AsFloat32x4; + } + + static inline float32x4_t& As(__m128i& value) { + return value.AsFloat32x4; + } + + static inline const float32x4_t& As(const __m128i& value) { + return value.AsFloat32x4; + } +}; + +template <> +struct TQType<float64x2_t> { + static inline float64x2_t& As(__m128& value) { + return value.AsFloat64x2; + } + + static inline const float64x2_t& As(const __m128& value) { + return value.AsFloat64x2; + } + + static inline float64x2_t& As(__m128i& value) { + return value.AsFloat64x2; + } + + static inline const float64x2_t& As(const __m128i& value) { + return value.AsFloat64x2; + } static inline float64x2_t& As(__m128d& value) { return value; @@ -720,30 +720,30 @@ struct TQType<float64x2_t> { static inline const float64x2_t& As(const __m128d& value) { return value; } -}; - -using _mm_set1_ps = TWrapperSimpleF<float32x4_t, - decltype(vdupq_n_f32), vdupq_n_f32, const float>; -using _mm_set_ps1 = TWrapperSimpleF<float32x4_t, - decltype(vdupq_n_f32), vdupq_n_f32, const float>; - -struct _mm_setzero_ps : TBaseWrapper<__m128> { +}; + +using _mm_set1_ps = TWrapperSimpleF<float32x4_t, + decltype(vdupq_n_f32), vdupq_n_f32, const float>; +using _mm_set_ps1 = TWrapperSimpleF<float32x4_t, + decltype(vdupq_n_f32), vdupq_n_f32, const float>; + +struct _mm_setzero_ps : TBaseWrapper<__m128> { Y_FORCE_INLINE - _mm_setzero_ps() { - TQType<float32x4_t>::As(Value) = vdupq_n_f32(0.); - } -}; - + _mm_setzero_ps() { + TQType<float32x4_t>::As(Value) = vdupq_n_f32(0.); + } +}; + Y_FORCE_INLINE __m128d _mm_setzero_pd() { return vdupq_n_f64(0.); } Y_FORCE_INLINE __m128 _mm_loadu_ps(const float* ptr) { - __m128 result; - result.AsFloat32x4 = vld1q_f32(ptr); - return result; -} - + __m128 result; + result.AsFloat32x4 = vld1q_f32(ptr); + return result; +} + Y_FORCE_INLINE __m128 _mm_load_ps(const float* ptr) { __m128 result; result.AsFloat32x4 = vld1q_f32(ptr); @@ -751,23 +751,23 @@ Y_FORCE_INLINE __m128 _mm_load_ps(const float* ptr) { } Y_FORCE_INLINE void _mm_storeu_ps(float* ptr, const __m128& op) { - vst1q_f32(ptr, op.AsFloat32x4); -} - + vst1q_f32(ptr, op.AsFloat32x4); +} + Y_FORCE_INLINE void _mm_store_ps(float* ptr, const __m128& op) { vst1q_f32(ptr, op.AsFloat32x4); } -struct _mm_set_ps : TBaseWrapper<__m128> { +struct _mm_set_ps : TBaseWrapper<__m128> { Y_FORCE_INLINE - _mm_set_ps(const float x3, const float x2, - const float x1, const float x0) { - float32x2_t d0 = {x0, x1}; - float32x2_t d1 = {x2, x3}; - TQType<float32x4_t>::As(Value) = vcombine_f32(d0, d1); - } -}; - + _mm_set_ps(const float x3, const float x2, + const float x1, const float x0) { + float32x2_t d0 = {x0, x1}; + float32x2_t d1 = {x2, x3}; + TQType<float32x4_t>::As(Value) = vcombine_f32(d0, d1); + } +}; + Y_FORCE_INLINE __m128d _mm_set_pd(double d1, double d0) { const float64x1_t p0 = {d0}; const float64x1_t p1 = {d1}; @@ -788,81 +788,81 @@ Y_FORCE_INLINE void _mm_store_pd(double* res, __m128d a) { vst1q_f64(res, a); } -using _mm_add_ps = TWrapperDualF<float32x4_t, decltype(vaddq_f32), vaddq_f32>; -using _mm_sub_ps = TWrapperDualF<float32x4_t, decltype(vsubq_f32), vsubq_f32>; -using _mm_mul_ps = TWrapperDualF<float32x4_t, decltype(vmulq_f32), vmulq_f32>; -using _mm_div_ps = TWrapperDualF<float32x4_t, decltype(vdivq_f32), vdivq_f32>; -using _mm_cmpeq_ps = TWrapperDualF<float32x4_t, decltype(vceqq_f32), vceqq_f32>; -using _mm_cmpgt_ps = TWrapperDualF<float32x4_t, decltype(vcgtq_f32), vcgtq_f32>; -using _mm_max_ps = TWrapperDualF<float32x4_t, decltype(vmaxq_f32), vmaxq_f32>; -using _mm_min_ps = TWrapperDualF<float32x4_t, decltype(vminq_f32), vminq_f32>; - +using _mm_add_ps = TWrapperDualF<float32x4_t, decltype(vaddq_f32), vaddq_f32>; +using _mm_sub_ps = TWrapperDualF<float32x4_t, decltype(vsubq_f32), vsubq_f32>; +using _mm_mul_ps = TWrapperDualF<float32x4_t, decltype(vmulq_f32), vmulq_f32>; +using _mm_div_ps = TWrapperDualF<float32x4_t, decltype(vdivq_f32), vdivq_f32>; +using _mm_cmpeq_ps = TWrapperDualF<float32x4_t, decltype(vceqq_f32), vceqq_f32>; +using _mm_cmpgt_ps = TWrapperDualF<float32x4_t, decltype(vcgtq_f32), vcgtq_f32>; +using _mm_max_ps = TWrapperDualF<float32x4_t, decltype(vmaxq_f32), vmaxq_f32>; +using _mm_min_ps = TWrapperDualF<float32x4_t, decltype(vminq_f32), vminq_f32>; + using _mm_add_pd = TWrapperDualF<float64x2_t, decltype(vaddq_f64), vaddq_f64, __m128d>; using _mm_sub_pd = TWrapperDualF<float64x2_t, decltype(vsubq_f64), vsubq_f64, __m128d>; using _mm_mul_pd = TWrapperDualF<float64x2_t, decltype(vmulq_f64), vmulq_f64, __m128d>; using _mm_div_pd = TWrapperDualF<float64x2_t, decltype(vdivq_f64), vdivq_f64, __m128d>; -struct _mm_and_ps : TBaseWrapper<__m128> { +struct _mm_and_ps : TBaseWrapper<__m128> { Y_FORCE_INLINE - _mm_and_ps(const __m128& op1, const __m128& op2) { - TQType<uint64x2_t>::As(Value) = - vandq_u64(TQType<uint64x2_t>::As(op1), - TQType<uint64x2_t>::As(op2)); - } -}; - + _mm_and_ps(const __m128& op1, const __m128& op2) { + TQType<uint64x2_t>::As(Value) = + vandq_u64(TQType<uint64x2_t>::As(op1), + TQType<uint64x2_t>::As(op2)); + } +}; + Y_FORCE_INLINE __m128d _mm_and_pd(__m128d a, __m128d b) { return vandq_u64(a, b); } Y_FORCE_INLINE void _MM_TRANSPOSE4_PS(__m128& op0, __m128& op1, __m128& op2, __m128& op3) { - float64x2_t im0 = - (float64x2_t)vtrn1q_f32(op0.AsFloat32x4, op1.AsFloat32x4); - float64x2_t im1 = - (float64x2_t)vtrn2q_f32(op0.AsFloat32x4, op1.AsFloat32x4); - float64x2_t im2 = - (float64x2_t)vtrn1q_f32(op2.AsFloat32x4, op3.AsFloat32x4); - float64x2_t im3 = - (float64x2_t)vtrn2q_f32(op2.AsFloat32x4, op3.AsFloat32x4); - - TQType<float64x2_t>::As(op0) = vtrn1q_f64(im0, im2); - TQType<float64x2_t>::As(op1) = vtrn1q_f64(im1, im3); - TQType<float64x2_t>::As(op2) = vtrn2q_f64(im0, im2); - TQType<float64x2_t>::As(op3) = vtrn2q_f64(im1, im3); -}; - + float64x2_t im0 = + (float64x2_t)vtrn1q_f32(op0.AsFloat32x4, op1.AsFloat32x4); + float64x2_t im1 = + (float64x2_t)vtrn2q_f32(op0.AsFloat32x4, op1.AsFloat32x4); + float64x2_t im2 = + (float64x2_t)vtrn1q_f32(op2.AsFloat32x4, op3.AsFloat32x4); + float64x2_t im3 = + (float64x2_t)vtrn2q_f32(op2.AsFloat32x4, op3.AsFloat32x4); + + TQType<float64x2_t>::As(op0) = vtrn1q_f64(im0, im2); + TQType<float64x2_t>::As(op1) = vtrn1q_f64(im1, im3); + TQType<float64x2_t>::As(op2) = vtrn2q_f64(im0, im2); + TQType<float64x2_t>::As(op3) = vtrn2q_f64(im1, im3); +}; + Y_FORCE_INLINE __m128 _mm_castsi128_ps(__m128i op) { - return reinterpret_cast<__m128&>(op); -} - + return reinterpret_cast<__m128&>(op); +} + Y_FORCE_INLINE __m128i _mm_castps_si128(__m128 op) { - return reinterpret_cast<__m128i&>(op); -} - -template <typename TOpOut, typename TOpIn, - typename TFunc, TFunc* func, typename... TParams> -struct TCvtS2FWrapperSingle : TBaseWrapper<__m128> { + return reinterpret_cast<__m128i&>(op); +} + +template <typename TOpOut, typename TOpIn, + typename TFunc, TFunc* func, typename... TParams> +struct TCvtS2FWrapperSingle : TBaseWrapper<__m128> { Y_FORCE_INLINE - TCvtS2FWrapperSingle(const __m128i& op, TParams... params) { - TQType<TOpOut>::As(Value) = - func(TQType<TOpIn>::As(op), params...); - } -}; - -using _mm_cvtepi32_ps = - TCvtS2FWrapperSingle<float32x4_t, int32x4_t, - decltype(vcvtq_f32_s32), vcvtq_f32_s32>; - -template <typename TOpOut, typename TOpIn, - typename TFunc, TFunc* func, typename... TParams> -struct TCvtF2SWrapperSingle : TBaseWrapper<__m128i> { + TCvtS2FWrapperSingle(const __m128i& op, TParams... params) { + TQType<TOpOut>::As(Value) = + func(TQType<TOpIn>::As(op), params...); + } +}; + +using _mm_cvtepi32_ps = + TCvtS2FWrapperSingle<float32x4_t, int32x4_t, + decltype(vcvtq_f32_s32), vcvtq_f32_s32>; + +template <typename TOpOut, typename TOpIn, + typename TFunc, TFunc* func, typename... TParams> +struct TCvtF2SWrapperSingle : TBaseWrapper<__m128i> { Y_FORCE_INLINE - TCvtF2SWrapperSingle(const __m128& op, TParams... params) { - TQType<TOpOut>::As(Value) = - func(TQType<TOpIn>::As(op), params...); - } -}; - + TCvtF2SWrapperSingle(const __m128& op, TParams... params) { + TQType<TOpOut>::As(Value) = + func(TQType<TOpIn>::As(op), params...); + } +}; + inline __m128i _mm_cvtps_epi32(__m128 a) { /// vcvtq_s32_f32 rounds to zero, but we need to round to the nearest. static const float32x4_t half = vdupq_n_f32(0.5f); @@ -874,26 +874,26 @@ inline __m128i _mm_cvtps_epi32(__m128 a) { return res; } -using _mm_cvttps_epi32 = - TCvtF2SWrapperSingle<int32x4_t, float32x4_t, - decltype(vcvtq_s32_f32), vcvtq_s32_f32>; - +using _mm_cvttps_epi32 = + TCvtF2SWrapperSingle<int32x4_t, float32x4_t, + decltype(vcvtq_s32_f32), vcvtq_s32_f32>; + Y_FORCE_INLINE int -_mm_movemask_ps(const __m128& op) { - uint32x4_t mask = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; - uint32x4_t bits = vandq_u32(op.AsUi32x4, mask); - int32x4_t shifts = {-31, -30, -29, -28}; - bits = vshlq_u32(bits, shifts); - return vaddvq_u32(bits); -} +_mm_movemask_ps(const __m128& op) { + uint32x4_t mask = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; + uint32x4_t bits = vandq_u32(op.AsUi32x4, mask); + int32x4_t shifts = {-31, -30, -29, -28}; + bits = vshlq_u32(bits, shifts); + return vaddvq_u32(bits); +} Y_FORCE_INLINE i64 _mm_cvtsi128_si64(__m128i a) { return vgetq_lane_s64(a.AsSi64x2, 0); } - -static inline void _mm_pause() { + +static inline void _mm_pause() { __asm__ ("YIELD"); -} +} static inline __m128 _mm_rsqrt_ps(__m128 a) { __m128 res; diff --git a/library/cpp/sse/ut/test.cpp b/library/cpp/sse/ut/test.cpp index 33c999d284..42a82a8cfa 100644 --- a/library/cpp/sse/ut/test.cpp +++ b/library/cpp/sse/ut/test.cpp @@ -1,10 +1,10 @@ -/* - Unittests for all SSE instrinsics translated to NEON instrinsics or - software implementation. - Should be tested both on Intel and ARM64. - */ -/* Author: Vitaliy Manushkin <agri@yandex-team.ru */ - +/* + Unittests for all SSE instrinsics translated to NEON instrinsics or + software implementation. + Should be tested both on Intel and ARM64. + */ +/* Author: Vitaliy Manushkin <agri@yandex-team.ru */ + #include <library/cpp/testing/unittest/registar.h> #include <util/generic/typetraits.h> @@ -13,35 +13,35 @@ #include <util/stream/output.h> #include <algorithm> -#include <array> -#include <limits> +#include <array> +#include <limits> #include <memory> #include <type_traits> #include <utility> - -template <typename TResult, typename TFunc, TFunc* func> -struct T_mm_CallWrapper { - TResult Value; - - template <typename... TParams> - T_mm_CallWrapper(TParams&&... params) { - Value = func(std::forward<TParams>(params)...); - } - - operator TResult&() { - return Value; - } - - operator const TResult&() const { - return Value; - } -}; - -#if defined(_arm64_) + +template <typename TResult, typename TFunc, TFunc* func> +struct T_mm_CallWrapper { + TResult Value; + + template <typename... TParams> + T_mm_CallWrapper(TParams&&... params) { + Value = func(std::forward<TParams>(params)...); + } + + operator TResult&() { + return Value; + } + + operator const TResult&() const { + return Value; + } +}; + +#if defined(_arm64_) #include "library/cpp/sse/sse2neon.h" #elif defined(_i386_) || defined(_x86_64_) -#include <xmmintrin.h> -#include <emmintrin.h> +#include <xmmintrin.h> +#include <emmintrin.h> #include <smmintrin.h> #elif defined(_ppc64_) #include "library/cpp/sse/powerpc.h" @@ -54,10 +54,10 @@ struct T_mm_CallWrapper { #define WrapF(T_mm_func) T_mm_func #define WrapD(T_mm_func) T_mm_func #elif defined(_ppc64_) || defined(_i386_) || defined(_x86_64_) -#define Wrap(_mm_func) \ - T_mm_CallWrapper<__m128i, decltype(_mm_func), _mm_func> -#define WrapF(_mm_func) \ - T_mm_CallWrapper<__m128, decltype(_mm_func), _mm_func> +#define Wrap(_mm_func) \ + T_mm_CallWrapper<__m128i, decltype(_mm_func), _mm_func> +#define WrapF(_mm_func) \ + T_mm_CallWrapper<__m128, decltype(_mm_func), _mm_func> #define WrapD(_mm_func) \ T_mm_CallWrapper<__m128d, decltype(_mm_func), _mm_func> using int8x16_t = std::array<i8, 16>; @@ -70,69 +70,69 @@ using uint32x4_t = std::array<ui32, 4>; using uint64x2_t = std::array<ui64, 2>; using float32x4_t = std::array<float, 4>; using float64x2_t = std::array<double, 2>; - + template <typename TVectorType> -struct TQType { +struct TQType { static TVectorType As(__m128i param) { TVectorType value; - _mm_storeu_si128((__m128i*)&value, param); - return value; - } + _mm_storeu_si128((__m128i*)&value, param); + return value; + } static TVectorType As(__m128 param) { TVectorType value; - _mm_storeu_ps((float*)&value, param); - return value; - } + _mm_storeu_ps((float*)&value, param); + return value; + } static TVectorType As(__m128d param) { TVectorType value; _mm_storeu_pd((double*)&value, param); return value; } -}; -#endif - +}; +#endif + template <typename TVectorType> -struct TFuncLoad; +struct TFuncLoad; template <typename TVectorType> -struct TFuncStore; - -template <> -struct TFuncLoad<__m128i> { - __m128i Value; - - template <typename TPointer> - TFuncLoad(TPointer* ptr) { - Value = _mm_loadu_si128((__m128i*)ptr); - } - - operator __m128i&() { - return Value; - } - - operator const __m128i&() const { - return Value; - } -}; - -template <> -struct TFuncLoad<__m128> { - __m128 Value; - - template <typename TPointer> - TFuncLoad(TPointer* ptr) { - Value = _mm_loadu_ps((float*)ptr); - } - - operator __m128&() { - return Value; - } - - operator const __m128&() const { - return Value; - } -}; - -template <> +struct TFuncStore; + +template <> +struct TFuncLoad<__m128i> { + __m128i Value; + + template <typename TPointer> + TFuncLoad(TPointer* ptr) { + Value = _mm_loadu_si128((__m128i*)ptr); + } + + operator __m128i&() { + return Value; + } + + operator const __m128i&() const { + return Value; + } +}; + +template <> +struct TFuncLoad<__m128> { + __m128 Value; + + template <typename TPointer> + TFuncLoad(TPointer* ptr) { + Value = _mm_loadu_ps((float*)ptr); + } + + operator __m128&() { + return Value; + } + + operator const __m128&() const { + return Value; + } +}; + +template <> struct TFuncLoad<__m128d> { __m128d Value; @@ -151,153 +151,153 @@ struct TFuncLoad<__m128d> { }; template <> -struct TFuncStore<__m128i> { - template <typename TPointer> - TFuncStore(TPointer* ptr, __m128i Value) { - _mm_storeu_si128((__m128i*)ptr, Value); - } -}; - -template <> -struct TFuncStore<__m128> { - template <typename TPointer> - TFuncStore(TPointer* ptr, __m128 Value) { - _mm_storeu_ps((float*)ptr, Value); - } -}; - -class TSSEEmulTest: public TTestBase { -private: - UNIT_TEST_SUITE(TSSEEmulTest); - UNIT_TEST(Test_mm_load_si128); - UNIT_TEST(Test_mm_loadu_si128); +struct TFuncStore<__m128i> { + template <typename TPointer> + TFuncStore(TPointer* ptr, __m128i Value) { + _mm_storeu_si128((__m128i*)ptr, Value); + } +}; + +template <> +struct TFuncStore<__m128> { + template <typename TPointer> + TFuncStore(TPointer* ptr, __m128 Value) { + _mm_storeu_ps((float*)ptr, Value); + } +}; + +class TSSEEmulTest: public TTestBase { +private: + UNIT_TEST_SUITE(TSSEEmulTest); + UNIT_TEST(Test_mm_load_si128); + UNIT_TEST(Test_mm_loadu_si128); UNIT_TEST(Test_mm_storeu_si128); UNIT_TEST(Test_mm_loadu_si128_2); UNIT_TEST(Test_mm_loadu_ps); UNIT_TEST(Test_mm_storeu_ps); - + UNIT_TEST(Test_mm_slli_epi16); UNIT_TEST(Test_mm_slli_epi32); UNIT_TEST(Test_mm_slli_epi64); UNIT_TEST(Test_mm_slli_si128); - UNIT_TEST(Test_mm_srli_epi16); - UNIT_TEST(Test_mm_srli_epi32); - UNIT_TEST(Test_mm_srli_epi64); + UNIT_TEST(Test_mm_srli_epi16); + UNIT_TEST(Test_mm_srli_epi32); + UNIT_TEST(Test_mm_srli_epi64); UNIT_TEST(Test_mm_srli_si128); - + UNIT_TEST(Test_mm_srai_epi16); UNIT_TEST(Test_mm_srai_epi32); UNIT_TEST(Test_mm_sll_epi16); UNIT_TEST(Test_mm_sll_epi32); UNIT_TEST(Test_mm_sll_epi64); - + UNIT_TEST(Test_mm_srl_epi16); UNIT_TEST(Test_mm_srl_epi32); UNIT_TEST(Test_mm_srl_epi64); - UNIT_TEST(Test_mm_add_epi16); - UNIT_TEST(Test_mm_add_epi32); - UNIT_TEST(Test_mm_add_epi64); - UNIT_TEST(Test_mm_add_ps); + UNIT_TEST(Test_mm_add_epi16); + UNIT_TEST(Test_mm_add_epi32); + UNIT_TEST(Test_mm_add_epi64); + UNIT_TEST(Test_mm_add_ps); UNIT_TEST(Test_mm_add_pd); - + UNIT_TEST(Test_mm_madd_epi16); - UNIT_TEST(Test_mm_sub_epi16); - UNIT_TEST(Test_mm_sub_epi32); - UNIT_TEST(Test_mm_sub_epi64); - UNIT_TEST(Test_mm_sub_ps); + UNIT_TEST(Test_mm_sub_epi16); + UNIT_TEST(Test_mm_sub_epi32); + UNIT_TEST(Test_mm_sub_epi64); + UNIT_TEST(Test_mm_sub_ps); UNIT_TEST(Test_mm_sub_pd); - - UNIT_TEST(Test_mm_mul_ps); + + UNIT_TEST(Test_mm_mul_ps); UNIT_TEST(Test_mm_mul_pd); - UNIT_TEST(Test_mm_div_ps); + UNIT_TEST(Test_mm_div_ps); UNIT_TEST(Test_mm_div_pd); - UNIT_TEST(Test_mm_max_ps); - UNIT_TEST(Test_mm_min_ps); - UNIT_TEST(Test_mm_and_ps); - - UNIT_TEST(Test_mm_unpacklo_epi8); - UNIT_TEST(Test_mm_unpackhi_epi8); - UNIT_TEST(Test_mm_unpacklo_epi16); - UNIT_TEST(Test_mm_unpackhi_epi16); - UNIT_TEST(Test_mm_unpacklo_epi32); - UNIT_TEST(Test_mm_unpackhi_epi32); - UNIT_TEST(Test_mm_unpacklo_epi64); - UNIT_TEST(Test_mm_unpackhi_epi64); - - UNIT_TEST(Test_mm_or_si128); - UNIT_TEST(Test_mm_and_si128); - UNIT_TEST(Test_mm_andnot_si128); - - UNIT_TEST(Test_mm_cmpeq_epi8); - UNIT_TEST(Test_mm_cmpeq_epi16); - UNIT_TEST(Test_mm_cmpeq_epi32); - UNIT_TEST(Test_mm_cmpeq_ps); - - UNIT_TEST(Test_mm_cmpgt_epi8); - UNIT_TEST(Test_mm_cmpgt_epi16); - UNIT_TEST(Test_mm_cmpgt_epi32); - UNIT_TEST(Test_mm_cmpgt_ps); - - UNIT_TEST(Test_mm_cmplt_epi8); - UNIT_TEST(Test_mm_cmplt_epi16); - UNIT_TEST(Test_mm_cmplt_epi32); - - UNIT_TEST(Test_mm_set1_epi8); - UNIT_TEST(Test_mm_set1_epi16); - UNIT_TEST(Test_mm_set1_epi32); - UNIT_TEST(Test_mm_set1_ps); + UNIT_TEST(Test_mm_max_ps); + UNIT_TEST(Test_mm_min_ps); + UNIT_TEST(Test_mm_and_ps); + + UNIT_TEST(Test_mm_unpacklo_epi8); + UNIT_TEST(Test_mm_unpackhi_epi8); + UNIT_TEST(Test_mm_unpacklo_epi16); + UNIT_TEST(Test_mm_unpackhi_epi16); + UNIT_TEST(Test_mm_unpacklo_epi32); + UNIT_TEST(Test_mm_unpackhi_epi32); + UNIT_TEST(Test_mm_unpacklo_epi64); + UNIT_TEST(Test_mm_unpackhi_epi64); + + UNIT_TEST(Test_mm_or_si128); + UNIT_TEST(Test_mm_and_si128); + UNIT_TEST(Test_mm_andnot_si128); + + UNIT_TEST(Test_mm_cmpeq_epi8); + UNIT_TEST(Test_mm_cmpeq_epi16); + UNIT_TEST(Test_mm_cmpeq_epi32); + UNIT_TEST(Test_mm_cmpeq_ps); + + UNIT_TEST(Test_mm_cmpgt_epi8); + UNIT_TEST(Test_mm_cmpgt_epi16); + UNIT_TEST(Test_mm_cmpgt_epi32); + UNIT_TEST(Test_mm_cmpgt_ps); + + UNIT_TEST(Test_mm_cmplt_epi8); + UNIT_TEST(Test_mm_cmplt_epi16); + UNIT_TEST(Test_mm_cmplt_epi32); + + UNIT_TEST(Test_mm_set1_epi8); + UNIT_TEST(Test_mm_set1_epi16); + UNIT_TEST(Test_mm_set1_epi32); + UNIT_TEST(Test_mm_set1_ps); UNIT_TEST(Test_mm_set_ps1); - - UNIT_TEST(Test_mm_setzero_si128); - UNIT_TEST(Test_mm_setzero_ps); + + UNIT_TEST(Test_mm_setzero_si128); + UNIT_TEST(Test_mm_setzero_ps); UNIT_TEST(Test_mm_setzero_pd); - - UNIT_TEST(Test_mm_storel_epi64); - UNIT_TEST(Test_mm_loadl_epi64); - + + UNIT_TEST(Test_mm_storel_epi64); + UNIT_TEST(Test_mm_loadl_epi64); + UNIT_TEST(Test_mm_loadl_pd); UNIT_TEST(Test_mm_loadh_pd); UNIT_TEST(Test_mm_cvtsd_f64); - UNIT_TEST(Test_mm_shuffle_epi32); - UNIT_TEST(Test_mm_movemask_epi8); - UNIT_TEST(Test_mm_cvtsi128_si32); + UNIT_TEST(Test_mm_shuffle_epi32); + UNIT_TEST(Test_mm_movemask_epi8); + UNIT_TEST(Test_mm_cvtsi128_si32); UNIT_TEST(Test_mm_cvtsi128_si64); - - UNIT_TEST(Test_mm_set_epi16); - UNIT_TEST(Test_mm_set_epi32); - UNIT_TEST(Test_mm_set_ps); + + UNIT_TEST(Test_mm_set_epi16); + UNIT_TEST(Test_mm_set_epi32); + UNIT_TEST(Test_mm_set_ps); UNIT_TEST(Test_mm_set_pd); - - UNIT_TEST(Test_mm_cvtsi32_si128); + + UNIT_TEST(Test_mm_cvtsi32_si128); UNIT_TEST(Test_mm_cvtsi64_si128); - - UNIT_TEST(Test_mm_packs_epi16); - UNIT_TEST(Test_mm_packs_epi32); - UNIT_TEST(Test_mm_packus_epi16); - - UNIT_TEST(Test_mm_extract_epi16); + + UNIT_TEST(Test_mm_packs_epi16); + UNIT_TEST(Test_mm_packs_epi32); + UNIT_TEST(Test_mm_packus_epi16); + + UNIT_TEST(Test_mm_extract_epi16); UNIT_TEST(Test_mm_extract_epi8); UNIT_TEST(Test_mm_extract_epi32); UNIT_TEST(Test_mm_extract_epi64); - - UNIT_TEST(Test_MM_TRANSPOSE4_PS); - UNIT_TEST(Test_mm_movemask_ps); + + UNIT_TEST(Test_MM_TRANSPOSE4_PS); + UNIT_TEST(Test_mm_movemask_ps); UNIT_TEST(Test_mm_movemask_ps_2); - - UNIT_TEST(Test_mm_cvtepi32_ps); - UNIT_TEST(Test_mm_cvtps_epi32); - UNIT_TEST(Test_mm_cvttps_epi32); - - UNIT_TEST(Test_mm_castsi128_ps); - UNIT_TEST(Test_mm_castps_si128); - - UNIT_TEST(Test_mm_mul_epu32); - + + UNIT_TEST(Test_mm_cvtepi32_ps); + UNIT_TEST(Test_mm_cvtps_epi32); + UNIT_TEST(Test_mm_cvttps_epi32); + + UNIT_TEST(Test_mm_castsi128_ps); + UNIT_TEST(Test_mm_castps_si128); + + UNIT_TEST(Test_mm_mul_epu32); + UNIT_TEST(Test_mm_cmpunord_ps); UNIT_TEST(Test_mm_andnot_ps); UNIT_TEST(Test_mm_shuffle_ps); @@ -310,36 +310,36 @@ private: UNIT_TEST(Test_mm_rsqrt_ps); UNIT_TEST(Test_matrixnet_powerpc); - UNIT_TEST_SUITE_END(); - -public: - void Test_mm_load_si128(); - void Test_mm_loadu_si128(); + UNIT_TEST_SUITE_END(); + +public: + void Test_mm_load_si128(); + void Test_mm_loadu_si128(); void Test_mm_storeu_si128(); void Test_mm_loadu_si128_2(); void Test_mm_loadu_ps(); void Test_mm_storeu_ps(); - - template <typename TElem, int bits, int elemCount, + + template <typename TElem, int bits, int elemCount, typename TFunc, typename TShifter, typename TOp, typename TElemFunc> - void Test_mm_shifter_epiXX(); - + void Test_mm_shifter_epiXX(); + enum class EDirection { Left, Right }; - + struct TShiftRes { __m128i Value[17]; }; void Test_mm_byte_shifter(EDirection direction, std::function<TShiftRes (__m128i)> foo); - void Test_mm_slli_epi16(); - void Test_mm_slli_epi32(); - void Test_mm_slli_epi64(); + void Test_mm_slli_epi16(); + void Test_mm_slli_epi32(); + void Test_mm_slli_epi64(); void Test_mm_slli_si128(); - + void Test_mm_srli_epi16(); void Test_mm_srli_epi32(); void Test_mm_srli_epi64(); @@ -356,134 +356,134 @@ public: void Test_mm_srl_epi32(); void Test_mm_srl_epi64(); - void Test_mm_add_epi8(); - void Test_mm_add_epi16(); - void Test_mm_add_epi32(); - void Test_mm_add_epi64(); - void Test_mm_add_ps(); + void Test_mm_add_epi8(); + void Test_mm_add_epi16(); + void Test_mm_add_epi32(); + void Test_mm_add_epi64(); + void Test_mm_add_ps(); void Test_mm_add_pd(); - + void Test_mm_madd_epi16(); - void Test_mm_sub_epi8(); - void Test_mm_sub_epi16(); - void Test_mm_sub_epi32(); - void Test_mm_sub_epi64(); - void Test_mm_sub_ps(); + void Test_mm_sub_epi8(); + void Test_mm_sub_epi16(); + void Test_mm_sub_epi32(); + void Test_mm_sub_epi64(); + void Test_mm_sub_ps(); void Test_mm_sub_pd(); - - void Test_mm_mul_ps(); + + void Test_mm_mul_ps(); void Test_mm_mul_pd(); - void Test_mm_div_ps(); + void Test_mm_div_ps(); void Test_mm_div_pd(); - void Test_mm_max_ps(); - void Test_mm_min_ps(); - void Test_mm_and_ps(); - - template <typename TElem, int bits, int elemCount, int shift, - typename TFunc, typename TOp> - void Test_mm_unpack_epiXX(); - void Test_mm_unpacklo_epi8(); - void Test_mm_unpackhi_epi8(); - void Test_mm_unpacklo_epi16(); - void Test_mm_unpackhi_epi16(); - void Test_mm_unpacklo_epi32(); - void Test_mm_unpackhi_epi32(); - void Test_mm_unpacklo_epi64(); - void Test_mm_unpackhi_epi64(); - - template <typename TElem, unsigned elemCount, - typename TFunc, typename TElemFunc, + void Test_mm_max_ps(); + void Test_mm_min_ps(); + void Test_mm_and_ps(); + + template <typename TElem, int bits, int elemCount, int shift, + typename TFunc, typename TOp> + void Test_mm_unpack_epiXX(); + void Test_mm_unpacklo_epi8(); + void Test_mm_unpackhi_epi8(); + void Test_mm_unpacklo_epi16(); + void Test_mm_unpackhi_epi16(); + void Test_mm_unpacklo_epi32(); + void Test_mm_unpackhi_epi32(); + void Test_mm_unpacklo_epi64(); + void Test_mm_unpackhi_epi64(); + + template <typename TElem, unsigned elemCount, + typename TFunc, typename TElemFunc, typename TOp, typename TVectorType = __m128i> - void Test_mm_dualop(); - - template <typename TElem, unsigned elemCount, - typename TFunc, typename TElemFunc, + void Test_mm_dualop(); + + template <typename TElem, unsigned elemCount, + typename TFunc, typename TElemFunc, typename TOp, typename TVectorType = __m128i> - void Test_mm_dualcmp(); - - void Test_mm_or_si128(); - void Test_mm_and_si128(); - void Test_mm_andnot_si128(); - - void Test_mm_cmpeq_epi8(); - void Test_mm_cmpeq_epi16(); - void Test_mm_cmpeq_epi32(); - void Test_mm_cmpeq_ps(); - - void Test_mm_cmpgt_epi8(); - void Test_mm_cmpgt_epi16(); - void Test_mm_cmpgt_epi32(); - void Test_mm_cmpgt_ps(); - - void Test_mm_cmplt_epi8(); - void Test_mm_cmplt_epi16(); - void Test_mm_cmplt_epi32(); - - template <typename TElem, int elemCount, + void Test_mm_dualcmp(); + + void Test_mm_or_si128(); + void Test_mm_and_si128(); + void Test_mm_andnot_si128(); + + void Test_mm_cmpeq_epi8(); + void Test_mm_cmpeq_epi16(); + void Test_mm_cmpeq_epi32(); + void Test_mm_cmpeq_ps(); + + void Test_mm_cmpgt_epi8(); + void Test_mm_cmpgt_epi16(); + void Test_mm_cmpgt_epi32(); + void Test_mm_cmpgt_ps(); + + void Test_mm_cmplt_epi8(); + void Test_mm_cmplt_epi16(); + void Test_mm_cmplt_epi32(); + + template <typename TElem, int elemCount, typename TFunc, typename TOp, typename TVectorType> - void Test_mm_setter_epiXX(); - void Test_mm_set1_epi8(); - void Test_mm_set1_epi16(); - void Test_mm_set1_epi32(); - void Test_mm_set1_ps(); + void Test_mm_setter_epiXX(); + void Test_mm_set1_epi8(); + void Test_mm_set1_epi16(); + void Test_mm_set1_epi32(); + void Test_mm_set1_ps(); void Test_mm_set_ps1(); - - void Test_mm_setzero_si128(); - void Test_mm_setzero_ps(); + + void Test_mm_setzero_si128(); + void Test_mm_setzero_ps(); void Test_mm_setzero_pd(); - - void Test_mm_loadl_epi64(); - void Test_mm_storel_epi64(); - + + void Test_mm_loadl_epi64(); + void Test_mm_storel_epi64(); + void Test_mm_loadl_pd(); void Test_mm_loadh_pd(); void Test_mm_cvtsd_f64(); - void Test_mm_shuffle_epi32(); - void Test_mm_movemask_epi8(); - void Test_mm_cvtsi128_si32(); + void Test_mm_shuffle_epi32(); + void Test_mm_movemask_epi8(); + void Test_mm_cvtsi128_si32(); void Test_mm_cvtsi128_si64(); - - void Test_mm_set_epi16(); - void Test_mm_set_epi32(); - void Test_mm_set_ps(); + + void Test_mm_set_epi16(); + void Test_mm_set_epi32(); + void Test_mm_set_ps(); void Test_mm_set_pd(); - - void Test_mm_cvtsi32_si128(); + + void Test_mm_cvtsi32_si128(); void Test_mm_cvtsi64_si128(); - - template <typename TElem, typename TNarrow, unsigned elemCount, - typename TFunc> - void Test_mm_packs_epiXX(); - void Test_mm_packs_epi16(); - void Test_mm_packs_epi32(); - void Test_mm_packus_epi16(); - - void Test_mm_extract_epi16(); + + template <typename TElem, typename TNarrow, unsigned elemCount, + typename TFunc> + void Test_mm_packs_epiXX(); + void Test_mm_packs_epi16(); + void Test_mm_packs_epi32(); + void Test_mm_packus_epi16(); + + void Test_mm_extract_epi16(); void Test_mm_extract_epi8(); void Test_mm_extract_epi32(); void Test_mm_extract_epi64(); - - void Test_MM_TRANSPOSE4_PS(); - void Test_mm_movemask_ps(); + + void Test_MM_TRANSPOSE4_PS(); + void Test_mm_movemask_ps(); void Test_mm_movemask_ps_2(); - - template <typename TFrom, typename TTo, unsigned elemCount, - typename TLoadVector, typename TResultVector, - typename TElemFunc, typename TFunc, typename TOp> - void Test_mm_convertop(); - void Test_mm_cvtepi32_ps(); - void Test_mm_cvtps_epi32(); - void Test_mm_cvttps_epi32(); - - template <typename TLoadVector, typename TCastVector, - typename TFunc, TFunc* func> - void Test_mm_castXX(); - void Test_mm_castsi128_ps(); - void Test_mm_castps_si128(); - - void Test_mm_mul_epu32(); + + template <typename TFrom, typename TTo, unsigned elemCount, + typename TLoadVector, typename TResultVector, + typename TElemFunc, typename TFunc, typename TOp> + void Test_mm_convertop(); + void Test_mm_cvtepi32_ps(); + void Test_mm_cvtps_epi32(); + void Test_mm_cvttps_epi32(); + + template <typename TLoadVector, typename TCastVector, + typename TFunc, TFunc* func> + void Test_mm_castXX(); + void Test_mm_castsi128_ps(); + void Test_mm_castps_si128(); + + void Test_mm_mul_epu32(); void Test_mm_cmpunord_ps(); void Test_mm_store_ss(); @@ -497,30 +497,30 @@ public: void Test_mm_rsqrt_ps(); void Test_mm_rsqrt_ss(); void Test_matrixnet_powerpc(); -}; - -UNIT_TEST_SUITE_REGISTRATION(TSSEEmulTest); - -void TSSEEmulTest::Test_mm_load_si128() { +}; + +UNIT_TEST_SUITE_REGISTRATION(TSSEEmulTest); + +void TSSEEmulTest::Test_mm_load_si128() { alignas(16) char data[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - __m128i value = _mm_load_si128((__m128i*)&data); - UNIT_ASSERT_EQUAL(TQType<uint64x2_t>::As(value)[0], 0xAABB2211CCFF00AAUL); - UNIT_ASSERT_EQUAL(TQType<uint64x2_t>::As(value)[1], 0x1C66775588449933UL); -} - -void TSSEEmulTest::Test_mm_loadu_si128() { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + __m128i value = _mm_load_si128((__m128i*)&data); + UNIT_ASSERT_EQUAL(TQType<uint64x2_t>::As(value)[0], 0xAABB2211CCFF00AAUL); + UNIT_ASSERT_EQUAL(TQType<uint64x2_t>::As(value)[1], 0x1C66775588449933UL); +} + +void TSSEEmulTest::Test_mm_loadu_si128() { alignas(16) char data[17] = { - '\x66', - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - UNIT_ASSERT((ui64(&data[1]) & 0x1) == 0x1); - __m128i value = _mm_loadu_si128((__m128i*)&data[1]); - UNIT_ASSERT(TQType<uint64x2_t>::As(value)[0] == 0xAABB2211CCFF00AAUL); - UNIT_ASSERT(TQType<uint64x2_t>::As(value)[1] == 0x1C66775588449933UL); -} - + '\x66', + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + UNIT_ASSERT((ui64(&data[1]) & 0x1) == 0x1); + __m128i value = _mm_loadu_si128((__m128i*)&data[1]); + UNIT_ASSERT(TQType<uint64x2_t>::As(value)[0] == 0xAABB2211CCFF00AAUL); + UNIT_ASSERT(TQType<uint64x2_t>::As(value)[1] == 0x1C66775588449933UL); +} + void TSSEEmulTest::Test_mm_storeu_si128() { alignas(16) unsigned char stub[32] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, @@ -640,32 +640,32 @@ unsigned MakeNumber<unsigned>(unsigned number) { return number; } -template <typename TElem, int bits, int elemCount, +template <typename TElem, int bits, int elemCount, typename TFunc, typename TShifter, typename TOp, typename TElemFunc> -void TSSEEmulTest::Test_mm_shifter_epiXX() { - char data[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - TElem* dataw = reinterpret_cast<TElem*>(&data); - - __m128i value = _mm_loadu_si128((__m128i*)&data); - +void TSSEEmulTest::Test_mm_shifter_epiXX() { + char data[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + TElem* dataw = reinterpret_cast<TElem*>(&data); + + __m128i value = _mm_loadu_si128((__m128i*)&data); + for (unsigned shifter = 0; shifter <= bits; ++shifter) { - TElem shiftedData[elemCount]; + TElem shiftedData[elemCount]; for (unsigned i = 0; i < elemCount; ++i) { - shiftedData[i] = TElemFunc::Call(dataw[i], shifter); + shiftedData[i] = TElemFunc::Call(dataw[i], shifter); } - + const TShifter adhoc_shifter = MakeNumber<TShifter>(shifter); __m128i result = TFunc(value, adhoc_shifter); for (unsigned i = 0; i < elemCount; ++i) { - UNIT_ASSERT_EQUAL(shiftedData[i], TQType<TOp>::As(result)[i]); + UNIT_ASSERT_EQUAL(shiftedData[i], TQType<TOp>::As(result)[i]); } - } -} - + } +} + void TSSEEmulTest::Test_mm_byte_shifter(EDirection direction, std::function<TShiftRes (__m128i)> foo) { const char data[48] = { @@ -713,52 +713,52 @@ struct THelperASHR { } }; -template <typename TElem> -struct THelperSHR { - static TElem Call(const TElem op, const int shift) { +template <typename TElem> +struct THelperSHR { + static TElem Call(const TElem op, const int shift) { constexpr int nBitsInOp = sizeof(op) * CHAR_BIT; return shift < nBitsInOp ? op >> shift : 0; - } -}; - -void TSSEEmulTest::Test_mm_srli_epi16() { + } +}; + +void TSSEEmulTest::Test_mm_srli_epi16() { Test_mm_shifter_epiXX<ui16, 16, 8, Wrap(_mm_srli_epi16), unsigned, uint16x8_t, - THelperSHR<ui16>>(); -} - -void TSSEEmulTest::Test_mm_srli_epi32() { + THelperSHR<ui16>>(); +} + +void TSSEEmulTest::Test_mm_srli_epi32() { Test_mm_shifter_epiXX<ui32, 32, 4, Wrap(_mm_srli_epi32), unsigned, uint32x4_t, - THelperSHR<ui32>>(); -} - -void TSSEEmulTest::Test_mm_srli_epi64() { + THelperSHR<ui32>>(); +} + +void TSSEEmulTest::Test_mm_srli_epi64() { Test_mm_shifter_epiXX<ui64, 64, 2, Wrap(_mm_srli_epi64), unsigned, uint64x2_t, - THelperSHR<ui64>>(); -} - -template <typename TElem> -struct THelperSHL { - static TElem Call(const TElem op, const int shift) { + THelperSHR<ui64>>(); +} + +template <typename TElem> +struct THelperSHL { + static TElem Call(const TElem op, const int shift) { constexpr int nBitsInOp = sizeof(op) * CHAR_BIT; return shift < nBitsInOp ? op << shift : 0; - } -}; - -void TSSEEmulTest::Test_mm_slli_epi16() { + } +}; + +void TSSEEmulTest::Test_mm_slli_epi16() { Test_mm_shifter_epiXX<ui16, 16, 8, Wrap(_mm_slli_epi16), unsigned, uint16x8_t, - THelperSHL<ui16>>(); -} - -void TSSEEmulTest::Test_mm_slli_epi32() { + THelperSHL<ui16>>(); +} + +void TSSEEmulTest::Test_mm_slli_epi32() { Test_mm_shifter_epiXX<ui32, 32, 4, Wrap(_mm_slli_epi32), unsigned, uint32x4_t, - THelperSHL<ui32>>(); -} - -void TSSEEmulTest::Test_mm_slli_epi64() { + THelperSHL<ui32>>(); +} + +void TSSEEmulTest::Test_mm_slli_epi64() { Test_mm_shifter_epiXX<ui64, 64, 2, Wrap(_mm_slli_epi64), unsigned, uint64x2_t, - THelperSHL<ui64>>(); -} - + THelperSHL<ui64>>(); +} + void TSSEEmulTest::Test_mm_slli_si128() { Test_mm_byte_shifter(EDirection::Left, [] (__m128i a) -> TShiftRes { TShiftRes res; @@ -849,30 +849,30 @@ void TSSEEmulTest::Test_mm_sll_epi64() { THelperSHL<ui64>>(); } -template <typename TElem> -struct THelperAdd { - static TElem Call(const TElem op1, const TElem op2) { - return op1 + op2; - } -}; - -void TSSEEmulTest::Test_mm_add_epi16() { - Test_mm_dualop<ui16, 8, Wrap(_mm_add_epi16), THelperAdd<ui16>, uint16x8_t>(); -} - -void TSSEEmulTest::Test_mm_add_epi32() { - Test_mm_dualop<ui32, 4, Wrap(_mm_add_epi32), THelperAdd<ui32>, uint32x4_t>(); -} - -void TSSEEmulTest::Test_mm_add_epi64() { - Test_mm_dualop<ui64, 2, Wrap(_mm_add_epi64), THelperAdd<ui64>, uint64x2_t>(); -} - -void TSSEEmulTest::Test_mm_add_ps() { - Test_mm_dualop<float, 2, WrapF(_mm_add_ps), - THelperAdd<float>, float32x4_t, __m128>(); -} - +template <typename TElem> +struct THelperAdd { + static TElem Call(const TElem op1, const TElem op2) { + return op1 + op2; + } +}; + +void TSSEEmulTest::Test_mm_add_epi16() { + Test_mm_dualop<ui16, 8, Wrap(_mm_add_epi16), THelperAdd<ui16>, uint16x8_t>(); +} + +void TSSEEmulTest::Test_mm_add_epi32() { + Test_mm_dualop<ui32, 4, Wrap(_mm_add_epi32), THelperAdd<ui32>, uint32x4_t>(); +} + +void TSSEEmulTest::Test_mm_add_epi64() { + Test_mm_dualop<ui64, 2, Wrap(_mm_add_epi64), THelperAdd<ui64>, uint64x2_t>(); +} + +void TSSEEmulTest::Test_mm_add_ps() { + Test_mm_dualop<float, 2, WrapF(_mm_add_ps), + THelperAdd<float>, float32x4_t, __m128>(); +} + void TSSEEmulTest::Test_mm_add_pd() { Test_mm_dualop<double, 2, WrapD(_mm_add_pd), THelperAdd<double>, float64x2_t, __m128d>(); @@ -904,44 +904,44 @@ void TSSEEmulTest::Test_mm_madd_epi16() { } -template <typename TElem> -struct THelperSub { - static TElem Call(const TElem op1, const TElem op2) { - return op1 - op2; - } -}; - -void TSSEEmulTest::Test_mm_sub_epi16() { - Test_mm_dualop<ui16, 8, Wrap(_mm_sub_epi16), THelperSub<ui16>, uint16x8_t>(); -} - -void TSSEEmulTest::Test_mm_sub_epi32() { - Test_mm_dualop<ui32, 4, Wrap(_mm_sub_epi32), THelperSub<ui32>, uint32x4_t>(); -} - -void TSSEEmulTest::Test_mm_sub_epi64() { - Test_mm_dualop<ui64, 2, Wrap(_mm_sub_epi64), THelperSub<ui64>, uint64x2_t>(); -} - -void TSSEEmulTest::Test_mm_sub_ps() { - Test_mm_dualop<float, 4, WrapF(_mm_sub_ps), THelperSub<float>, - float32x4_t, __m128>(); -} - +template <typename TElem> +struct THelperSub { + static TElem Call(const TElem op1, const TElem op2) { + return op1 - op2; + } +}; + +void TSSEEmulTest::Test_mm_sub_epi16() { + Test_mm_dualop<ui16, 8, Wrap(_mm_sub_epi16), THelperSub<ui16>, uint16x8_t>(); +} + +void TSSEEmulTest::Test_mm_sub_epi32() { + Test_mm_dualop<ui32, 4, Wrap(_mm_sub_epi32), THelperSub<ui32>, uint32x4_t>(); +} + +void TSSEEmulTest::Test_mm_sub_epi64() { + Test_mm_dualop<ui64, 2, Wrap(_mm_sub_epi64), THelperSub<ui64>, uint64x2_t>(); +} + +void TSSEEmulTest::Test_mm_sub_ps() { + Test_mm_dualop<float, 4, WrapF(_mm_sub_ps), THelperSub<float>, + float32x4_t, __m128>(); +} + void TSSEEmulTest::Test_mm_sub_pd() { Test_mm_dualop<double, 2, WrapD(_mm_sub_pd), THelperSub<double>, float64x2_t, __m128d>(); } -void TSSEEmulTest::Test_mm_mul_ps() { - struct THelper { - static float Call(const float op1, const float op2) { - return op1 * op2; - } - }; - Test_mm_dualop<float, 4, WrapF(_mm_mul_ps), THelper, float32x4_t, __m128>(); -} - +void TSSEEmulTest::Test_mm_mul_ps() { + struct THelper { + static float Call(const float op1, const float op2) { + return op1 * op2; + } + }; + Test_mm_dualop<float, 4, WrapF(_mm_mul_ps), THelper, float32x4_t, __m128>(); +} + void TSSEEmulTest::Test_mm_mul_pd() { struct THelper { static double Call(const double op1, const double op2) { @@ -951,15 +951,15 @@ void TSSEEmulTest::Test_mm_mul_pd() { Test_mm_dualop<double, 2, WrapD(_mm_mul_pd), THelper, float64x2_t, __m128d>(); } -void TSSEEmulTest::Test_mm_div_ps() { - struct THelper { - static float Call(const float op1, const float op2) { - return op1 / op2; - } - }; - Test_mm_dualop<float, 4, WrapF(_mm_div_ps), THelper, float32x4_t, __m128>(); -} - +void TSSEEmulTest::Test_mm_div_ps() { + struct THelper { + static float Call(const float op1, const float op2) { + return op1 / op2; + } + }; + Test_mm_dualop<float, 4, WrapF(_mm_div_ps), THelper, float32x4_t, __m128>(); +} + void TSSEEmulTest::Test_mm_div_pd() { struct THelper { static double Call(const double op1, const double op2) { @@ -969,441 +969,441 @@ void TSSEEmulTest::Test_mm_div_pd() { Test_mm_dualop<double, 2, WrapD(_mm_div_pd), THelper, float64x2_t, __m128d>(); } -void TSSEEmulTest::Test_mm_max_ps() { - struct THelper { - static float Call(const float op1, const float op2) { - return std::max(op1, op2); - } - }; - Test_mm_dualop<float, 4, WrapF(_mm_max_ps), THelper, float32x4_t, __m128>(); -} - -void TSSEEmulTest::Test_mm_min_ps() { - struct THelper { - static float Call(const float op1, const float op2) { - return std::min(op1, op2); - } - }; - Test_mm_dualop<float, 4, WrapF(_mm_min_ps), THelper, float32x4_t, __m128>(); -} - -void TSSEEmulTest::Test_mm_and_ps() { - struct THelper { - static float Call(const float op1, const float op2) { - union Cast { - unsigned int AsUInt; - float AsFloat; - }; - Cast v1, v2, result; - v1.AsFloat = op1; - v2.AsFloat = op2; - result.AsUInt = v1.AsUInt & v2.AsUInt; - return result.AsFloat; - } - }; - Test_mm_dualcmp<float, 4, WrapF(_mm_and_ps), - THelper, float32x4_t, __m128>(); -} - -template <typename TElem, int bits, int elemCount, int shift, - typename TFunc, typename TOp> -void TSSEEmulTest::Test_mm_unpack_epiXX() { - char data1[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - char data2[16] = { - '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', - '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'}; - TElem* dataw1 = reinterpret_cast<TElem*>(&data1); - TElem* dataw2 = reinterpret_cast<TElem*>(&data2); - - __m128i value1 = _mm_loadu_si128((__m128i*)&data1); - __m128i value2 = _mm_loadu_si128((__m128i*)&data2); - - TElem zippedData[elemCount]; - for (unsigned i = 0; i < elemCount / 2; ++i) { - zippedData[i * 2] = dataw1[i + shift]; - zippedData[i * 2 + 1] = dataw2[i + shift]; - } - __m128i result = TFunc(value1, value2); - - for (unsigned i = 0; i < elemCount / 2; ++i) { - UNIT_ASSERT_EQUAL(zippedData[i * 2], TQType<TOp>::As(result)[i * 2]); - UNIT_ASSERT_EQUAL(zippedData[i * 2 + 1], - TQType<TOp>::As(result)[i * 2 + 1]); - } -} - -void TSSEEmulTest::Test_mm_unpacklo_epi8() { - Test_mm_unpack_epiXX<ui8, 8, 16, 0, Wrap(_mm_unpacklo_epi8), uint8x16_t>(); -} - -void TSSEEmulTest::Test_mm_unpackhi_epi8() { - Test_mm_unpack_epiXX<ui8, 8, 16, 8, Wrap(_mm_unpackhi_epi8), uint8x16_t>(); -} - -void TSSEEmulTest::Test_mm_unpacklo_epi16() { - Test_mm_unpack_epiXX<ui16, 16, 8, 0, Wrap(_mm_unpacklo_epi16), uint16x8_t>(); -} - -void TSSEEmulTest::Test_mm_unpackhi_epi16() { - Test_mm_unpack_epiXX<ui16, 16, 8, 4, Wrap(_mm_unpackhi_epi16), uint16x8_t>(); -} - -void TSSEEmulTest::Test_mm_unpacklo_epi32() { - Test_mm_unpack_epiXX<ui32, 32, 4, 0, Wrap(_mm_unpacklo_epi32), uint32x4_t>(); -} - -void TSSEEmulTest::Test_mm_unpackhi_epi32() { - Test_mm_unpack_epiXX<ui32, 32, 4, 2, Wrap(_mm_unpackhi_epi32), uint32x4_t>(); -} - -void TSSEEmulTest::Test_mm_unpacklo_epi64() { - Test_mm_unpack_epiXX<ui64, 64, 2, 0, Wrap(_mm_unpacklo_epi64), uint64x2_t>(); -} - -void TSSEEmulTest::Test_mm_unpackhi_epi64() { - Test_mm_unpack_epiXX<ui64, 64, 2, 1, Wrap(_mm_unpackhi_epi64), uint64x2_t>(); -} - -template <typename TElem, unsigned elemCount, - typename TFunc, typename TElemFunc, +void TSSEEmulTest::Test_mm_max_ps() { + struct THelper { + static float Call(const float op1, const float op2) { + return std::max(op1, op2); + } + }; + Test_mm_dualop<float, 4, WrapF(_mm_max_ps), THelper, float32x4_t, __m128>(); +} + +void TSSEEmulTest::Test_mm_min_ps() { + struct THelper { + static float Call(const float op1, const float op2) { + return std::min(op1, op2); + } + }; + Test_mm_dualop<float, 4, WrapF(_mm_min_ps), THelper, float32x4_t, __m128>(); +} + +void TSSEEmulTest::Test_mm_and_ps() { + struct THelper { + static float Call(const float op1, const float op2) { + union Cast { + unsigned int AsUInt; + float AsFloat; + }; + Cast v1, v2, result; + v1.AsFloat = op1; + v2.AsFloat = op2; + result.AsUInt = v1.AsUInt & v2.AsUInt; + return result.AsFloat; + } + }; + Test_mm_dualcmp<float, 4, WrapF(_mm_and_ps), + THelper, float32x4_t, __m128>(); +} + +template <typename TElem, int bits, int elemCount, int shift, + typename TFunc, typename TOp> +void TSSEEmulTest::Test_mm_unpack_epiXX() { + char data1[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + char data2[16] = { + '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', + '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'}; + TElem* dataw1 = reinterpret_cast<TElem*>(&data1); + TElem* dataw2 = reinterpret_cast<TElem*>(&data2); + + __m128i value1 = _mm_loadu_si128((__m128i*)&data1); + __m128i value2 = _mm_loadu_si128((__m128i*)&data2); + + TElem zippedData[elemCount]; + for (unsigned i = 0; i < elemCount / 2; ++i) { + zippedData[i * 2] = dataw1[i + shift]; + zippedData[i * 2 + 1] = dataw2[i + shift]; + } + __m128i result = TFunc(value1, value2); + + for (unsigned i = 0; i < elemCount / 2; ++i) { + UNIT_ASSERT_EQUAL(zippedData[i * 2], TQType<TOp>::As(result)[i * 2]); + UNIT_ASSERT_EQUAL(zippedData[i * 2 + 1], + TQType<TOp>::As(result)[i * 2 + 1]); + } +} + +void TSSEEmulTest::Test_mm_unpacklo_epi8() { + Test_mm_unpack_epiXX<ui8, 8, 16, 0, Wrap(_mm_unpacklo_epi8), uint8x16_t>(); +} + +void TSSEEmulTest::Test_mm_unpackhi_epi8() { + Test_mm_unpack_epiXX<ui8, 8, 16, 8, Wrap(_mm_unpackhi_epi8), uint8x16_t>(); +} + +void TSSEEmulTest::Test_mm_unpacklo_epi16() { + Test_mm_unpack_epiXX<ui16, 16, 8, 0, Wrap(_mm_unpacklo_epi16), uint16x8_t>(); +} + +void TSSEEmulTest::Test_mm_unpackhi_epi16() { + Test_mm_unpack_epiXX<ui16, 16, 8, 4, Wrap(_mm_unpackhi_epi16), uint16x8_t>(); +} + +void TSSEEmulTest::Test_mm_unpacklo_epi32() { + Test_mm_unpack_epiXX<ui32, 32, 4, 0, Wrap(_mm_unpacklo_epi32), uint32x4_t>(); +} + +void TSSEEmulTest::Test_mm_unpackhi_epi32() { + Test_mm_unpack_epiXX<ui32, 32, 4, 2, Wrap(_mm_unpackhi_epi32), uint32x4_t>(); +} + +void TSSEEmulTest::Test_mm_unpacklo_epi64() { + Test_mm_unpack_epiXX<ui64, 64, 2, 0, Wrap(_mm_unpacklo_epi64), uint64x2_t>(); +} + +void TSSEEmulTest::Test_mm_unpackhi_epi64() { + Test_mm_unpack_epiXX<ui64, 64, 2, 1, Wrap(_mm_unpackhi_epi64), uint64x2_t>(); +} + +template <typename TElem, unsigned elemCount, + typename TFunc, typename TElemFunc, typename TOp, typename TVectorType> -void TSSEEmulTest::Test_mm_dualop() { - char data1[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - char data2[16] = { - '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', - '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'}; - TElem* dataw1 = reinterpret_cast<TElem*>(&data1); - TElem* dataw2 = reinterpret_cast<TElem*>(&data2); - +void TSSEEmulTest::Test_mm_dualop() { + char data1[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + char data2[16] = { + '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', + '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'}; + TElem* dataw1 = reinterpret_cast<TElem*>(&data1); + TElem* dataw2 = reinterpret_cast<TElem*>(&data2); + TVectorType value1 = TFuncLoad<TVectorType>(&data1); TVectorType value2 = TFuncLoad<TVectorType>(&data2); - - TElem procData[elemCount]; - for (unsigned i = 0; i < elemCount; ++i) { - procData[i] = TElemFunc::Call(dataw1[i], dataw2[i]); - } + + TElem procData[elemCount]; + for (unsigned i = 0; i < elemCount; ++i) { + procData[i] = TElemFunc::Call(dataw1[i], dataw2[i]); + } TVectorType result = TFunc(value1, value2); - - for (unsigned i = 0; i < elemCount; ++i) { - UNIT_ASSERT_EQUAL(procData[i], TQType<TOp>::As(result)[i]); - } -} - -/* This is almost the same as Test_mm_dualop, - but different data1 and data2 */ -template <typename TElem, unsigned elemCount, - typename TFunc, typename TElemFunc, + + for (unsigned i = 0; i < elemCount; ++i) { + UNIT_ASSERT_EQUAL(procData[i], TQType<TOp>::As(result)[i]); + } +} + +/* This is almost the same as Test_mm_dualop, + but different data1 and data2 */ +template <typename TElem, unsigned elemCount, + typename TFunc, typename TElemFunc, typename TOp, typename TVectorType> -void TSSEEmulTest::Test_mm_dualcmp() { - char data1[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x66', '\x77', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C'}; - char data2[16] = { - '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44', - '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'}; - TElem* dataw1 = reinterpret_cast<TElem*>(&data1); - TElem* dataw2 = reinterpret_cast<TElem*>(&data2); - +void TSSEEmulTest::Test_mm_dualcmp() { + char data1[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x66', '\x77', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C'}; + char data2[16] = { + '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44', + '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'}; + TElem* dataw1 = reinterpret_cast<TElem*>(&data1); + TElem* dataw2 = reinterpret_cast<TElem*>(&data2); + TVectorType value1 = TFuncLoad<TVectorType>(&data1); TVectorType value2 = TFuncLoad<TVectorType>(&data2); - - TElem procData[elemCount]; - for (unsigned i = 0; i < elemCount; ++i) { - procData[i] = TElemFunc::Call(dataw1[i], dataw2[i]); - } + + TElem procData[elemCount]; + for (unsigned i = 0; i < elemCount; ++i) { + procData[i] = TElemFunc::Call(dataw1[i], dataw2[i]); + } TVectorType result = TFunc(value1, value2); - - for (unsigned i = 0; i < elemCount; ++i) { - /* memcmp is for compare to invalid floats in results */ + + for (unsigned i = 0; i < elemCount; ++i) { + /* memcmp is for compare to invalid floats in results */ const TElem value = TQType<TOp>::As(result)[i]; UNIT_ASSERT(memcmp(&(procData[i]), &value, sizeof(TElem)) == 0); - } -} - -void TSSEEmulTest::Test_mm_or_si128() { - struct THelper { - static ui64 Call(const ui64 op1, const ui64 op2) { - return op1 | op2; - } - }; - - Test_mm_dualop<ui64, 2, Wrap(_mm_or_si128), THelper, uint64x2_t>(); -} - -void TSSEEmulTest::Test_mm_and_si128() { - struct THelper { - static ui64 Call(const ui64 op1, const ui64 op2) { - return op1 & op2; - } - }; - - Test_mm_dualop<ui64, 2, Wrap(_mm_and_si128), THelper, uint64x2_t>(); -} - -void TSSEEmulTest::Test_mm_andnot_si128() { - struct THelper { - static ui64 Call(const ui64 op1, const ui64 op2) { - return (~op1) & op2; - } - }; - - Test_mm_dualop<ui64, 2, Wrap(_mm_andnot_si128), THelper, uint64x2_t>(); -} - -template <typename TElem> -struct THelperCMPEQ { - static TElem Call(const TElem op1, const TElem op2) { - return op1 == op2 ? ~TElem(0) : TElem(0); - } -}; - -void TSSEEmulTest::Test_mm_cmpeq_epi8() { - Test_mm_dualcmp<ui8, 16, Wrap(_mm_cmpeq_epi8), - THelperCMPEQ<ui8>, uint8x16_t>(); -} - -void TSSEEmulTest::Test_mm_cmpeq_epi16() { - Test_mm_dualcmp<ui16, 8, Wrap(_mm_cmpeq_epi16), - THelperCMPEQ<ui16>, uint16x8_t>(); -} - -void TSSEEmulTest::Test_mm_cmpeq_epi32() { - Test_mm_dualcmp<ui32, 4, Wrap(_mm_cmpeq_epi32), - THelperCMPEQ<ui32>, uint32x4_t>(); -} - -void TSSEEmulTest::Test_mm_cmpeq_ps() { - struct THelperFloat { - static float Call(const float op1, const float op2) { - union Cast { - unsigned int AsUInt; - float AsFloat; - }; - Cast value; - value.AsUInt = op1 == op2 ? 0xFFFFFFFF : 0; - return value.AsFloat; - } - }; - - Test_mm_dualcmp<float, 4, WrapF(_mm_cmpeq_ps), - THelperFloat, float32x4_t, __m128>(); -} - -template <typename TElem> -struct THelperCMPGT { - static TElem Call(const TElem op1, const TElem op2) { - return op1 > op2 ? ~TElem(0) : TElem(0); - } -}; - -void TSSEEmulTest::Test_mm_cmpgt_epi8() { - Test_mm_dualcmp<i8, 16, Wrap(_mm_cmpgt_epi8), - THelperCMPGT<i8>, int8x16_t>(); -} - -void TSSEEmulTest::Test_mm_cmpgt_epi16() { - Test_mm_dualcmp<i16, 8, Wrap(_mm_cmpgt_epi16), - THelperCMPGT<i16>, int16x8_t>(); -} - -void TSSEEmulTest::Test_mm_cmpgt_epi32() { - Test_mm_dualcmp<i32, 4, Wrap(_mm_cmpgt_epi32), - THelperCMPGT<i32>, int32x4_t>(); -} - -void TSSEEmulTest::Test_mm_cmpgt_ps() { - struct THelperFloat { - static float Call(const float op1, const float op2) { - union Cast { - unsigned int AsUInt; - float AsFloat; - }; - Cast value; - value.AsUInt = op1 > op2 ? 0xFFFFFFFF : 0; - return value.AsFloat; - } - }; - - Test_mm_dualcmp<float, 4, WrapF(_mm_cmpgt_ps), - THelperFloat, float32x4_t, __m128>(); -} - -template <typename TElem> -struct THelperCMPLT { - static TElem Call(const TElem op1, const TElem op2) { - return op1 < op2 ? ~TElem(0) : TElem(0); - } -}; - -void TSSEEmulTest::Test_mm_cmplt_epi8() { - Test_mm_dualcmp<i8, 16, Wrap(_mm_cmplt_epi8), - THelperCMPLT<i8>, int8x16_t>(); -} - -void TSSEEmulTest::Test_mm_cmplt_epi16() { - Test_mm_dualcmp<i16, 8, Wrap(_mm_cmplt_epi16), - THelperCMPLT<i16>, int16x8_t>(); -} - -void TSSEEmulTest::Test_mm_cmplt_epi32() { - Test_mm_dualcmp<i32, 4, Wrap(_mm_cmplt_epi32), - THelperCMPLT<i32>, int32x4_t>(); -} - -template <typename TElem, int elemCount, + } +} + +void TSSEEmulTest::Test_mm_or_si128() { + struct THelper { + static ui64 Call(const ui64 op1, const ui64 op2) { + return op1 | op2; + } + }; + + Test_mm_dualop<ui64, 2, Wrap(_mm_or_si128), THelper, uint64x2_t>(); +} + +void TSSEEmulTest::Test_mm_and_si128() { + struct THelper { + static ui64 Call(const ui64 op1, const ui64 op2) { + return op1 & op2; + } + }; + + Test_mm_dualop<ui64, 2, Wrap(_mm_and_si128), THelper, uint64x2_t>(); +} + +void TSSEEmulTest::Test_mm_andnot_si128() { + struct THelper { + static ui64 Call(const ui64 op1, const ui64 op2) { + return (~op1) & op2; + } + }; + + Test_mm_dualop<ui64, 2, Wrap(_mm_andnot_si128), THelper, uint64x2_t>(); +} + +template <typename TElem> +struct THelperCMPEQ { + static TElem Call(const TElem op1, const TElem op2) { + return op1 == op2 ? ~TElem(0) : TElem(0); + } +}; + +void TSSEEmulTest::Test_mm_cmpeq_epi8() { + Test_mm_dualcmp<ui8, 16, Wrap(_mm_cmpeq_epi8), + THelperCMPEQ<ui8>, uint8x16_t>(); +} + +void TSSEEmulTest::Test_mm_cmpeq_epi16() { + Test_mm_dualcmp<ui16, 8, Wrap(_mm_cmpeq_epi16), + THelperCMPEQ<ui16>, uint16x8_t>(); +} + +void TSSEEmulTest::Test_mm_cmpeq_epi32() { + Test_mm_dualcmp<ui32, 4, Wrap(_mm_cmpeq_epi32), + THelperCMPEQ<ui32>, uint32x4_t>(); +} + +void TSSEEmulTest::Test_mm_cmpeq_ps() { + struct THelperFloat { + static float Call(const float op1, const float op2) { + union Cast { + unsigned int AsUInt; + float AsFloat; + }; + Cast value; + value.AsUInt = op1 == op2 ? 0xFFFFFFFF : 0; + return value.AsFloat; + } + }; + + Test_mm_dualcmp<float, 4, WrapF(_mm_cmpeq_ps), + THelperFloat, float32x4_t, __m128>(); +} + +template <typename TElem> +struct THelperCMPGT { + static TElem Call(const TElem op1, const TElem op2) { + return op1 > op2 ? ~TElem(0) : TElem(0); + } +}; + +void TSSEEmulTest::Test_mm_cmpgt_epi8() { + Test_mm_dualcmp<i8, 16, Wrap(_mm_cmpgt_epi8), + THelperCMPGT<i8>, int8x16_t>(); +} + +void TSSEEmulTest::Test_mm_cmpgt_epi16() { + Test_mm_dualcmp<i16, 8, Wrap(_mm_cmpgt_epi16), + THelperCMPGT<i16>, int16x8_t>(); +} + +void TSSEEmulTest::Test_mm_cmpgt_epi32() { + Test_mm_dualcmp<i32, 4, Wrap(_mm_cmpgt_epi32), + THelperCMPGT<i32>, int32x4_t>(); +} + +void TSSEEmulTest::Test_mm_cmpgt_ps() { + struct THelperFloat { + static float Call(const float op1, const float op2) { + union Cast { + unsigned int AsUInt; + float AsFloat; + }; + Cast value; + value.AsUInt = op1 > op2 ? 0xFFFFFFFF : 0; + return value.AsFloat; + } + }; + + Test_mm_dualcmp<float, 4, WrapF(_mm_cmpgt_ps), + THelperFloat, float32x4_t, __m128>(); +} + +template <typename TElem> +struct THelperCMPLT { + static TElem Call(const TElem op1, const TElem op2) { + return op1 < op2 ? ~TElem(0) : TElem(0); + } +}; + +void TSSEEmulTest::Test_mm_cmplt_epi8() { + Test_mm_dualcmp<i8, 16, Wrap(_mm_cmplt_epi8), + THelperCMPLT<i8>, int8x16_t>(); +} + +void TSSEEmulTest::Test_mm_cmplt_epi16() { + Test_mm_dualcmp<i16, 8, Wrap(_mm_cmplt_epi16), + THelperCMPLT<i16>, int16x8_t>(); +} + +void TSSEEmulTest::Test_mm_cmplt_epi32() { + Test_mm_dualcmp<i32, 4, Wrap(_mm_cmplt_epi32), + THelperCMPLT<i32>, int32x4_t>(); +} + +template <typename TElem, int elemCount, typename TFunc, typename TOp, typename TVectorType> -void TSSEEmulTest::Test_mm_setter_epiXX() { - char data[64] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x00', '\x55', '\x77', '\x66', '\x1C', - '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', - '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF', - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x00', '\x00', '\x00', - '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C', - '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44', - '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'}; - TElem* dataw = reinterpret_cast<TElem*>(&data); - - for (unsigned dataItem = 0; dataItem < elemCount * 4; ++dataItem) { +void TSSEEmulTest::Test_mm_setter_epiXX() { + char data[64] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x00', '\x55', '\x77', '\x66', '\x1C', + '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', + '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF', + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x00', '\x00', '\x00', + '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C', + '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44', + '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'}; + TElem* dataw = reinterpret_cast<TElem*>(&data); + + for (unsigned dataItem = 0; dataItem < elemCount * 4; ++dataItem) { TVectorType value = TFunc(dataw[dataItem]); - - for (unsigned i = 0; i < elemCount; ++i) - UNIT_ASSERT_EQUAL(dataw[dataItem], TQType<TOp>::As(value)[i]); - } -} - -void TSSEEmulTest::Test_mm_set1_epi8() { - Test_mm_setter_epiXX<i8, 16, Wrap(_mm_set1_epi8), int8x16_t, __m128i>(); -} -void TSSEEmulTest::Test_mm_set1_epi16() { - Test_mm_setter_epiXX<i16, 8, Wrap(_mm_set1_epi16), int16x8_t, __m128i>(); -} -void TSSEEmulTest::Test_mm_set1_epi32() { - Test_mm_setter_epiXX<i32, 4, Wrap(_mm_set1_epi32), int32x4_t, __m128i>(); -} -void TSSEEmulTest::Test_mm_set1_ps() { - Test_mm_setter_epiXX<float, 4, WrapF(_mm_set1_ps), float32x4_t, __m128>(); -} - + + for (unsigned i = 0; i < elemCount; ++i) + UNIT_ASSERT_EQUAL(dataw[dataItem], TQType<TOp>::As(value)[i]); + } +} + +void TSSEEmulTest::Test_mm_set1_epi8() { + Test_mm_setter_epiXX<i8, 16, Wrap(_mm_set1_epi8), int8x16_t, __m128i>(); +} +void TSSEEmulTest::Test_mm_set1_epi16() { + Test_mm_setter_epiXX<i16, 8, Wrap(_mm_set1_epi16), int16x8_t, __m128i>(); +} +void TSSEEmulTest::Test_mm_set1_epi32() { + Test_mm_setter_epiXX<i32, 4, Wrap(_mm_set1_epi32), int32x4_t, __m128i>(); +} +void TSSEEmulTest::Test_mm_set1_ps() { + Test_mm_setter_epiXX<float, 4, WrapF(_mm_set1_ps), float32x4_t, __m128>(); +} + void TSSEEmulTest::Test_mm_set_ps1() { Test_mm_setter_epiXX<float, 4, WrapF(_mm_set_ps1), float32x4_t, __m128>(); } -void TSSEEmulTest::Test_mm_setzero_si128() { - __m128i value = _mm_setzero_si128(); - for (unsigned i = 0; i < 4; ++i) - UNIT_ASSERT_EQUAL(0, TQType<uint32x4_t>::As(value)[i]); -} - -void TSSEEmulTest::Test_mm_setzero_ps() { - __m128 value = _mm_setzero_ps(); - for (unsigned i = 0; i < 4; ++i) - UNIT_ASSERT_EQUAL(0.0, TQType<float32x4_t>::As(value)[i]); -} - +void TSSEEmulTest::Test_mm_setzero_si128() { + __m128i value = _mm_setzero_si128(); + for (unsigned i = 0; i < 4; ++i) + UNIT_ASSERT_EQUAL(0, TQType<uint32x4_t>::As(value)[i]); +} + +void TSSEEmulTest::Test_mm_setzero_ps() { + __m128 value = _mm_setzero_ps(); + for (unsigned i = 0; i < 4; ++i) + UNIT_ASSERT_EQUAL(0.0, TQType<float32x4_t>::As(value)[i]); +} + void TSSEEmulTest::Test_mm_setzero_pd() { __m128d value = _mm_setzero_pd(); for (unsigned i = 0; i < 2; ++i) UNIT_ASSERT_EQUAL(0.0, TQType<float64x2_t>::As(value)[i]); } -void TSSEEmulTest::Test_mm_loadl_epi64() { - char data[64] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x00', '\x55', '\x77', '\x66', '\x1C', - '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', - '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF', - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x00', '\x00', '\x00', - '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C', - '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44', - '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'}; - ui64* dataw = reinterpret_cast<ui64*>(&data); - - for (unsigned dataItem = 0; dataItem < 8; ++dataItem) { - __m128i value = _mm_loadl_epi64((__m128i const*)&dataw[dataItem]); - - UNIT_ASSERT_EQUAL(dataw[dataItem], TQType<uint64x2_t>::As(value)[0]); - UNIT_ASSERT_EQUAL(0, TQType<uint64x2_t>::As(value)[1]); - } -} - -void TSSEEmulTest::Test_mm_storel_epi64() { - char data[64] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x00', '\x55', '\x77', '\x66', '\x1C', - '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', - '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF', - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x00', '\x00', '\x00', - '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C', - '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44', - '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'}; - ui64* dataw = reinterpret_cast<ui64*>(&data); - - for (unsigned dataItem = 0; dataItem < 4; ++dataItem) { - __m128i value = _mm_loadu_si128((__m128i*)&dataw[dataItem * 2]); - - ui64 buf[2] = {55, 81}; - _mm_storel_epi64((__m128i*)&buf, value); - - UNIT_ASSERT_EQUAL(dataw[dataItem * 2], buf[0]); - UNIT_ASSERT_EQUAL(81, buf[1]); - } -} - -void TSSEEmulTest::Test_mm_shuffle_epi32() { - char data[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - ui32* dataw = reinterpret_cast<ui32*>(&data); - __m128i value = _mm_loadu_si128((__m128i*)&data); - - int coding[4] = {1, 3, 0, 2}; - __m128i result = _mm_shuffle_epi32(value, _MM_SHUFFLE(2, 0, 3, 1)); - - for (unsigned i = 0; i < 4; ++i) - UNIT_ASSERT_EQUAL(dataw[coding[i]], - TQType<uint32x4_t>::As(result)[i]); -} - -static int GetHighBitAt(char data, int at) { - ui8 udata = data & 0x80; - return int(udata >> 7) << at; -} - -void TSSEEmulTest::Test_mm_movemask_epi8() { - char data[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - __m128i value = _mm_loadu_si128((__m128i*)&data); - - int result = _mm_movemask_epi8(value); - int verify = 0; - for (unsigned i = 0; i < 16; ++i) { - verify |= GetHighBitAt(data[i], i); - } - - UNIT_ASSERT_EQUAL(result, verify); -} - -void TSSEEmulTest::Test_mm_movemask_ps() { - char data[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - __m128 value = _mm_loadu_ps((float*)&data); - - int result = _mm_movemask_ps(value); - int verify = 0; - for (unsigned i = 0; i < 4; ++i) { - verify |= GetHighBitAt(data[i * 4 + 3], i); - } - - UNIT_ASSERT_EQUAL(result, verify); -} - +void TSSEEmulTest::Test_mm_loadl_epi64() { + char data[64] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x00', '\x55', '\x77', '\x66', '\x1C', + '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', + '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF', + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x00', '\x00', '\x00', + '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C', + '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44', + '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'}; + ui64* dataw = reinterpret_cast<ui64*>(&data); + + for (unsigned dataItem = 0; dataItem < 8; ++dataItem) { + __m128i value = _mm_loadl_epi64((__m128i const*)&dataw[dataItem]); + + UNIT_ASSERT_EQUAL(dataw[dataItem], TQType<uint64x2_t>::As(value)[0]); + UNIT_ASSERT_EQUAL(0, TQType<uint64x2_t>::As(value)[1]); + } +} + +void TSSEEmulTest::Test_mm_storel_epi64() { + char data[64] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x00', '\x55', '\x77', '\x66', '\x1C', + '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', + '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF', + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x00', '\x00', '\x00', + '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C', + '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44', + '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'}; + ui64* dataw = reinterpret_cast<ui64*>(&data); + + for (unsigned dataItem = 0; dataItem < 4; ++dataItem) { + __m128i value = _mm_loadu_si128((__m128i*)&dataw[dataItem * 2]); + + ui64 buf[2] = {55, 81}; + _mm_storel_epi64((__m128i*)&buf, value); + + UNIT_ASSERT_EQUAL(dataw[dataItem * 2], buf[0]); + UNIT_ASSERT_EQUAL(81, buf[1]); + } +} + +void TSSEEmulTest::Test_mm_shuffle_epi32() { + char data[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + ui32* dataw = reinterpret_cast<ui32*>(&data); + __m128i value = _mm_loadu_si128((__m128i*)&data); + + int coding[4] = {1, 3, 0, 2}; + __m128i result = _mm_shuffle_epi32(value, _MM_SHUFFLE(2, 0, 3, 1)); + + for (unsigned i = 0; i < 4; ++i) + UNIT_ASSERT_EQUAL(dataw[coding[i]], + TQType<uint32x4_t>::As(result)[i]); +} + +static int GetHighBitAt(char data, int at) { + ui8 udata = data & 0x80; + return int(udata >> 7) << at; +} + +void TSSEEmulTest::Test_mm_movemask_epi8() { + char data[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + __m128i value = _mm_loadu_si128((__m128i*)&data); + + int result = _mm_movemask_epi8(value); + int verify = 0; + for (unsigned i = 0; i < 16; ++i) { + verify |= GetHighBitAt(data[i], i); + } + + UNIT_ASSERT_EQUAL(result, verify); +} + +void TSSEEmulTest::Test_mm_movemask_ps() { + char data[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + __m128 value = _mm_loadu_ps((float*)&data); + + int result = _mm_movemask_ps(value); + int verify = 0; + for (unsigned i = 0; i < 4; ++i) { + verify |= GetHighBitAt(data[i * 4 + 3], i); + } + + UNIT_ASSERT_EQUAL(result, verify); +} + void TSSEEmulTest::Test_mm_movemask_ps_2() { char data[16] = { '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', @@ -1414,19 +1414,19 @@ void TSSEEmulTest::Test_mm_movemask_ps_2() { UNIT_ASSERT_EQUAL(result, 0xf); } -void TSSEEmulTest::Test_mm_cvtsi128_si32() { - char data[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - __m128i value = _mm_loadu_si128((__m128i*)&data); - - int result = _mm_cvtsi128_si32(value); - i32* datap = reinterpret_cast<i32*>(&data); - int verify = datap[0]; - - UNIT_ASSERT_EQUAL(result, verify); -} - +void TSSEEmulTest::Test_mm_cvtsi128_si32() { + char data[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + __m128i value = _mm_loadu_si128((__m128i*)&data); + + int result = _mm_cvtsi128_si32(value); + i32* datap = reinterpret_cast<i32*>(&data); + int verify = datap[0]; + + UNIT_ASSERT_EQUAL(result, verify); +} + void TSSEEmulTest::Test_mm_cvtsi128_si64() { char data[16] = { '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', @@ -1440,52 +1440,52 @@ void TSSEEmulTest::Test_mm_cvtsi128_si64() { UNIT_ASSERT_EQUAL(result, verify); } -void TSSEEmulTest::Test_mm_set_epi16() { - char data[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - i16* dataw = reinterpret_cast<i16*>(&data); - ui64* dataq = reinterpret_cast<ui64*>(&data); - - __m128i result = _mm_set_epi16(dataw[7], dataw[6], dataw[5], dataw[4], - dataw[3], dataw[2], dataw[1], dataw[0]); - ui64 buf[2] = {53, 81}; - _mm_storeu_si128((__m128i*)&buf, result); - - UNIT_ASSERT_EQUAL(buf[0], dataq[0]); - UNIT_ASSERT_EQUAL(buf[1], dataq[1]); -} - -void TSSEEmulTest::Test_mm_set_epi32() { - char data[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - i32* dataw = reinterpret_cast<i32*>(&data); - ui64* dataq = reinterpret_cast<ui64*>(&data); - - __m128i result = _mm_set_epi32(dataw[3], dataw[2], dataw[1], dataw[0]); - ui64 buf[2] = {53, 81}; - _mm_storeu_si128((__m128i*)&buf, result); - - UNIT_ASSERT_EQUAL(buf[0], dataq[0]); - UNIT_ASSERT_EQUAL(buf[1], dataq[1]); -} - -void TSSEEmulTest::Test_mm_set_ps() { - char data[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - float* dataw = reinterpret_cast<float*>(&data); - ui64* dataq = reinterpret_cast<ui64*>(&data); - - __m128 result = _mm_set_ps(dataw[3], dataw[2], dataw[1], dataw[0]); - ui64 buf[2] = {53, 81}; - _mm_storeu_ps((float*)&buf, result); - - UNIT_ASSERT_EQUAL(buf[0], dataq[0]); - UNIT_ASSERT_EQUAL(buf[1], dataq[1]); -} - +void TSSEEmulTest::Test_mm_set_epi16() { + char data[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + i16* dataw = reinterpret_cast<i16*>(&data); + ui64* dataq = reinterpret_cast<ui64*>(&data); + + __m128i result = _mm_set_epi16(dataw[7], dataw[6], dataw[5], dataw[4], + dataw[3], dataw[2], dataw[1], dataw[0]); + ui64 buf[2] = {53, 81}; + _mm_storeu_si128((__m128i*)&buf, result); + + UNIT_ASSERT_EQUAL(buf[0], dataq[0]); + UNIT_ASSERT_EQUAL(buf[1], dataq[1]); +} + +void TSSEEmulTest::Test_mm_set_epi32() { + char data[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + i32* dataw = reinterpret_cast<i32*>(&data); + ui64* dataq = reinterpret_cast<ui64*>(&data); + + __m128i result = _mm_set_epi32(dataw[3], dataw[2], dataw[1], dataw[0]); + ui64 buf[2] = {53, 81}; + _mm_storeu_si128((__m128i*)&buf, result); + + UNIT_ASSERT_EQUAL(buf[0], dataq[0]); + UNIT_ASSERT_EQUAL(buf[1], dataq[1]); +} + +void TSSEEmulTest::Test_mm_set_ps() { + char data[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + float* dataw = reinterpret_cast<float*>(&data); + ui64* dataq = reinterpret_cast<ui64*>(&data); + + __m128 result = _mm_set_ps(dataw[3], dataw[2], dataw[1], dataw[0]); + ui64 buf[2] = {53, 81}; + _mm_storeu_ps((float*)&buf, result); + + UNIT_ASSERT_EQUAL(buf[0], dataq[0]); + UNIT_ASSERT_EQUAL(buf[1], dataq[1]); +} + void TSSEEmulTest::Test_mm_set_pd() { char data[16] = { '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', @@ -1501,22 +1501,22 @@ void TSSEEmulTest::Test_mm_set_pd() { UNIT_ASSERT_EQUAL(buf[1], dataq[1]); } -void TSSEEmulTest::Test_mm_cvtsi32_si128() { - char data[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - i32* dataw = reinterpret_cast<i32*>(&data); - - __m128i result = _mm_cvtsi32_si128(dataw[0]); - i32 buf[4] = {53, 81, -43, 2132}; - _mm_storeu_si128((__m128i*)&buf, result); - - UNIT_ASSERT_EQUAL(buf[0], dataw[0]); - UNIT_ASSERT_EQUAL(buf[1], 0); - UNIT_ASSERT_EQUAL(buf[2], 0); - UNIT_ASSERT_EQUAL(buf[3], 0); -} - +void TSSEEmulTest::Test_mm_cvtsi32_si128() { + char data[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + i32* dataw = reinterpret_cast<i32*>(&data); + + __m128i result = _mm_cvtsi32_si128(dataw[0]); + i32 buf[4] = {53, 81, -43, 2132}; + _mm_storeu_si128((__m128i*)&buf, result); + + UNIT_ASSERT_EQUAL(buf[0], dataw[0]); + UNIT_ASSERT_EQUAL(buf[1], 0); + UNIT_ASSERT_EQUAL(buf[2], 0); + UNIT_ASSERT_EQUAL(buf[3], 0); +} + void TSSEEmulTest::Test_mm_cvtsi64_si128() { char data[16] = { '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', @@ -1531,44 +1531,44 @@ void TSSEEmulTest::Test_mm_cvtsi64_si128() { UNIT_ASSERT_EQUAL(buf[1], 0); } -template <typename TElem, typename TNarrow, unsigned elemCount, typename TFunc> -void TSSEEmulTest::Test_mm_packs_epiXX() { - char data[32] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x00', '\x66', '\x1C', - '\x99', '\x33', '\x1C', '\x55', '\x00', '\x00', '\x00', '\x00', - '\x00', '\xAA', '\x00', '\x00', '\xCC', '\xBB', '\x22', '\xFF'}; - __m128i value0 = _mm_loadu_si128((__m128i*)&data); - __m128i value1 = _mm_loadu_si128(((__m128i*)&data) + 1); - TElem* dataw = reinterpret_cast<TElem*>(&data); - - __m128i result = TFunc(value0, value1); - - TNarrow verify[elemCount]; - for (unsigned i = 0; i < elemCount; ++i) { - TElem sum = dataw[i]; - if (sum > std::numeric_limits<TNarrow>::max()) - sum = std::numeric_limits<TNarrow>::max(); - if (sum < std::numeric_limits<TNarrow>::min()) - sum = std::numeric_limits<TNarrow>::min(); - verify[i] = TNarrow(sum); - } - - ui64* verifyp = (ui64*)&verify; - UNIT_ASSERT_EQUAL(verifyp[0], TQType<uint64x2_t>::As(result)[0]); - UNIT_ASSERT_EQUAL(verifyp[1], TQType<uint64x2_t>::As(result)[1]); -} - -void TSSEEmulTest::Test_mm_packs_epi16() { - Test_mm_packs_epiXX<i16, i8, 16, Wrap(_mm_packs_epi16)>(); -} -void TSSEEmulTest::Test_mm_packs_epi32() { - Test_mm_packs_epiXX<i32, i16, 8, Wrap(_mm_packs_epi32)>(); -} -void TSSEEmulTest::Test_mm_packus_epi16() { - Test_mm_packs_epiXX<i16, ui8, 16, Wrap(_mm_packus_epi16)>(); -} - +template <typename TElem, typename TNarrow, unsigned elemCount, typename TFunc> +void TSSEEmulTest::Test_mm_packs_epiXX() { + char data[32] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x00', '\x66', '\x1C', + '\x99', '\x33', '\x1C', '\x55', '\x00', '\x00', '\x00', '\x00', + '\x00', '\xAA', '\x00', '\x00', '\xCC', '\xBB', '\x22', '\xFF'}; + __m128i value0 = _mm_loadu_si128((__m128i*)&data); + __m128i value1 = _mm_loadu_si128(((__m128i*)&data) + 1); + TElem* dataw = reinterpret_cast<TElem*>(&data); + + __m128i result = TFunc(value0, value1); + + TNarrow verify[elemCount]; + for (unsigned i = 0; i < elemCount; ++i) { + TElem sum = dataw[i]; + if (sum > std::numeric_limits<TNarrow>::max()) + sum = std::numeric_limits<TNarrow>::max(); + if (sum < std::numeric_limits<TNarrow>::min()) + sum = std::numeric_limits<TNarrow>::min(); + verify[i] = TNarrow(sum); + } + + ui64* verifyp = (ui64*)&verify; + UNIT_ASSERT_EQUAL(verifyp[0], TQType<uint64x2_t>::As(result)[0]); + UNIT_ASSERT_EQUAL(verifyp[1], TQType<uint64x2_t>::As(result)[1]); +} + +void TSSEEmulTest::Test_mm_packs_epi16() { + Test_mm_packs_epiXX<i16, i8, 16, Wrap(_mm_packs_epi16)>(); +} +void TSSEEmulTest::Test_mm_packs_epi32() { + Test_mm_packs_epiXX<i32, i16, 8, Wrap(_mm_packs_epi32)>(); +} +void TSSEEmulTest::Test_mm_packus_epi16() { + Test_mm_packs_epiXX<i16, ui8, 16, Wrap(_mm_packus_epi16)>(); +} + void TSSEEmulTest::Test_mm_extract_epi8() { alignas(16) char data[16] = { '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', @@ -1594,23 +1594,23 @@ void TSSEEmulTest::Test_mm_extract_epi8() { UNIT_ASSERT_EQUAL((_mm_extract_epi8(value, 15)), int(dataw[15])); } -void TSSEEmulTest::Test_mm_extract_epi16() { +void TSSEEmulTest::Test_mm_extract_epi16() { alignas(16) char data[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; const ui16* dataw = reinterpret_cast<const ui16*>(&data); const __m128i value = _mm_loadu_si128((__m128i*)&data); - - UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 0)), int(dataw[0])); - UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 1)), int(dataw[1])); - UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 2)), int(dataw[2])); - UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 3)), int(dataw[3])); - UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 4)), int(dataw[4])); - UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 5)), int(dataw[5])); - UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 6)), int(dataw[6])); - UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 7)), int(dataw[7])); -} - + + UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 0)), int(dataw[0])); + UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 1)), int(dataw[1])); + UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 2)), int(dataw[2])); + UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 3)), int(dataw[3])); + UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 4)), int(dataw[4])); + UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 5)), int(dataw[5])); + UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 6)), int(dataw[6])); + UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 7)), int(dataw[7])); +} + void TSSEEmulTest::Test_mm_extract_epi64() { alignas(16) char data[16] = { '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', @@ -1635,160 +1635,160 @@ void TSSEEmulTest::Test_mm_extract_epi32() { UNIT_ASSERT_EQUAL((_mm_extract_epi32(value, 3)), int(dataw[3])); } -void TSSEEmulTest::Test_MM_TRANSPOSE4_PS() { - char data0[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - char data1[16] = { - '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', - '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'}; - char data2[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - char data3[16] = { - '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', - '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'}; - - __m128 value0 = _mm_loadu_ps((float*)&data0); - __m128 value1 = _mm_loadu_ps((float*)&data1); - __m128 value2 = _mm_loadu_ps((float*)&data2); - __m128 value3 = _mm_loadu_ps((float*)&data3); - - _MM_TRANSPOSE4_PS(value0, value1, value2, value3); - - ui64 tbuf0[2] = {0, 0}; - ui64 tbuf1[2] = {0, 0}; - ui64 tbuf2[2] = {0, 0}; - ui64 tbuf3[2] = {0, 0}; - - _mm_storeu_ps((float*)&tbuf0, value0); - _mm_storeu_ps((float*)&tbuf1, value1); - _mm_storeu_ps((float*)&tbuf2, value2); - _mm_storeu_ps((float*)&tbuf3, value3); - - char tdata0[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x99', '\x33', '\x1C', '\x55', - '\xAA', '\x00', '\xFF', '\xCC', '\x99', '\x33', '\x1C', '\x55'}; - char tdata1[16] = { - '\x11', '\x22', '\xBB', '\xAA', '\x88', '\x66', '\x77', '\x44', - '\x11', '\x22', '\xBB', '\xAA', '\x88', '\x66', '\x77', '\x44'}; - char tdata2[16] = { - '\x33', '\x99', '\x44', '\x88', '\x00', '\xAA', '\xAA', '\x11', - '\x33', '\x99', '\x44', '\x88', '\x00', '\xAA', '\xAA', '\x11'}; - char tdata3[16] = { - '\x55', '\x77', '\x66', '\x1C', '\xCC', '\xBB', '\x22', '\xFF', - '\x55', '\x77', '\x66', '\x1C', '\xCC', '\xBB', '\x22', '\xFF'}; - - UNIT_ASSERT(memcmp(tbuf0, tdata0, 16) == 0); - UNIT_ASSERT(memcmp(tbuf1, tdata1, 16) == 0); - UNIT_ASSERT(memcmp(tbuf2, tdata2, 16) == 0); - UNIT_ASSERT(memcmp(tbuf3, tdata3, 16) == 0); -} - -template <typename TFrom, typename TTo, unsigned elemCount, - typename TLoadVector, typename TResultVector, - typename TElemFunc, typename TFunc, typename TOp> -void TSSEEmulTest::Test_mm_convertop() { - char data[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - TFrom* datap = reinterpret_cast<TFrom*>(&data); - - TLoadVector value = TFuncLoad<TLoadVector>(&data); - - TTo procData[elemCount]; - for (unsigned i = 0; i < elemCount; ++i) { - procData[i] = TElemFunc::Call(datap[i]); - } - - TResultVector result = TFunc(value); - - for (unsigned i = 0; i < elemCount; ++i) { - UNIT_ASSERT_EQUAL(procData[i], TQType<TOp>::As(result)[i]); - } -} - -void TSSEEmulTest::Test_mm_cvtepi32_ps() { - struct THelper { - static float Call(const i32 op) { - return float(op); - } - }; - Test_mm_convertop<i32, float, 4, __m128i, __m128, - THelper, WrapF(_mm_cvtepi32_ps), float32x4_t>(); -}; - -void TSSEEmulTest::Test_mm_cvtps_epi32() { - struct THelper { - static i32 Call(const float op) { - return i32(op); - } - }; - Test_mm_convertop<float, i32, 4, __m128, __m128i, +void TSSEEmulTest::Test_MM_TRANSPOSE4_PS() { + char data0[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + char data1[16] = { + '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', + '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'}; + char data2[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + char data3[16] = { + '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', + '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'}; + + __m128 value0 = _mm_loadu_ps((float*)&data0); + __m128 value1 = _mm_loadu_ps((float*)&data1); + __m128 value2 = _mm_loadu_ps((float*)&data2); + __m128 value3 = _mm_loadu_ps((float*)&data3); + + _MM_TRANSPOSE4_PS(value0, value1, value2, value3); + + ui64 tbuf0[2] = {0, 0}; + ui64 tbuf1[2] = {0, 0}; + ui64 tbuf2[2] = {0, 0}; + ui64 tbuf3[2] = {0, 0}; + + _mm_storeu_ps((float*)&tbuf0, value0); + _mm_storeu_ps((float*)&tbuf1, value1); + _mm_storeu_ps((float*)&tbuf2, value2); + _mm_storeu_ps((float*)&tbuf3, value3); + + char tdata0[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x99', '\x33', '\x1C', '\x55', + '\xAA', '\x00', '\xFF', '\xCC', '\x99', '\x33', '\x1C', '\x55'}; + char tdata1[16] = { + '\x11', '\x22', '\xBB', '\xAA', '\x88', '\x66', '\x77', '\x44', + '\x11', '\x22', '\xBB', '\xAA', '\x88', '\x66', '\x77', '\x44'}; + char tdata2[16] = { + '\x33', '\x99', '\x44', '\x88', '\x00', '\xAA', '\xAA', '\x11', + '\x33', '\x99', '\x44', '\x88', '\x00', '\xAA', '\xAA', '\x11'}; + char tdata3[16] = { + '\x55', '\x77', '\x66', '\x1C', '\xCC', '\xBB', '\x22', '\xFF', + '\x55', '\x77', '\x66', '\x1C', '\xCC', '\xBB', '\x22', '\xFF'}; + + UNIT_ASSERT(memcmp(tbuf0, tdata0, 16) == 0); + UNIT_ASSERT(memcmp(tbuf1, tdata1, 16) == 0); + UNIT_ASSERT(memcmp(tbuf2, tdata2, 16) == 0); + UNIT_ASSERT(memcmp(tbuf3, tdata3, 16) == 0); +} + +template <typename TFrom, typename TTo, unsigned elemCount, + typename TLoadVector, typename TResultVector, + typename TElemFunc, typename TFunc, typename TOp> +void TSSEEmulTest::Test_mm_convertop() { + char data[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + TFrom* datap = reinterpret_cast<TFrom*>(&data); + + TLoadVector value = TFuncLoad<TLoadVector>(&data); + + TTo procData[elemCount]; + for (unsigned i = 0; i < elemCount; ++i) { + procData[i] = TElemFunc::Call(datap[i]); + } + + TResultVector result = TFunc(value); + + for (unsigned i = 0; i < elemCount; ++i) { + UNIT_ASSERT_EQUAL(procData[i], TQType<TOp>::As(result)[i]); + } +} + +void TSSEEmulTest::Test_mm_cvtepi32_ps() { + struct THelper { + static float Call(const i32 op) { + return float(op); + } + }; + Test_mm_convertop<i32, float, 4, __m128i, __m128, + THelper, WrapF(_mm_cvtepi32_ps), float32x4_t>(); +}; + +void TSSEEmulTest::Test_mm_cvtps_epi32() { + struct THelper { + static i32 Call(const float op) { + return i32(op); + } + }; + Test_mm_convertop<float, i32, 4, __m128, __m128i, THelper, T_mm_CallWrapper<__m128i, decltype(_mm_cvtps_epi32), _mm_cvtps_epi32>, int32x4_t>(); -}; - -void TSSEEmulTest::Test_mm_cvttps_epi32() { - struct THelper { - static i32 Call(const float op) { - return i32(op); - } - }; - Test_mm_convertop<float, i32, 4, __m128, __m128i, - THelper, Wrap(_mm_cvttps_epi32), int32x4_t>(); -}; - -template <typename TLoadVector, typename TCastVector, - typename TFunc, TFunc* func> -void TSSEEmulTest::Test_mm_castXX() { - char data[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - - TLoadVector value = TFuncLoad<TLoadVector>(&data); - const TLoadVector constvalue = TFuncLoad<TLoadVector>(&data); - TCastVector casted = func(value); - const TCastVector constcasted = func(constvalue); - char verify[16]; - char constverify[16]; - TFuncStore<TCastVector>(&verify, casted); - TFuncStore<TCastVector>(&constverify, constcasted); - - UNIT_ASSERT(memcmp(&data, &verify, 16) == 0); - UNIT_ASSERT(memcmp(&data, &constverify, 16) == 0); -}; - -void TSSEEmulTest::Test_mm_castsi128_ps() { - Test_mm_castXX<__m128i, __m128, - decltype(_mm_castsi128_ps), _mm_castsi128_ps>(); -} - -void TSSEEmulTest::Test_mm_castps_si128() { - Test_mm_castXX<__m128, __m128i, - decltype(_mm_castps_si128), _mm_castps_si128>(); -} - -void TSSEEmulTest::Test_mm_mul_epu32() { - char data0[16] = { - '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', - '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - char data1[16] = { - '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', - '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'}; - ui32* dataw0 = reinterpret_cast<ui32*>(&data0); - ui32* dataw1 = reinterpret_cast<ui32*>(&data1); - - __m128i value0 = _mm_loadu_si128((__m128i*)&data0); - __m128i value1 = _mm_loadu_si128((__m128i*)&data1); - +}; + +void TSSEEmulTest::Test_mm_cvttps_epi32() { + struct THelper { + static i32 Call(const float op) { + return i32(op); + } + }; + Test_mm_convertop<float, i32, 4, __m128, __m128i, + THelper, Wrap(_mm_cvttps_epi32), int32x4_t>(); +}; + +template <typename TLoadVector, typename TCastVector, + typename TFunc, TFunc* func> +void TSSEEmulTest::Test_mm_castXX() { + char data[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + + TLoadVector value = TFuncLoad<TLoadVector>(&data); + const TLoadVector constvalue = TFuncLoad<TLoadVector>(&data); + TCastVector casted = func(value); + const TCastVector constcasted = func(constvalue); + char verify[16]; + char constverify[16]; + TFuncStore<TCastVector>(&verify, casted); + TFuncStore<TCastVector>(&constverify, constcasted); + + UNIT_ASSERT(memcmp(&data, &verify, 16) == 0); + UNIT_ASSERT(memcmp(&data, &constverify, 16) == 0); +}; + +void TSSEEmulTest::Test_mm_castsi128_ps() { + Test_mm_castXX<__m128i, __m128, + decltype(_mm_castsi128_ps), _mm_castsi128_ps>(); +} + +void TSSEEmulTest::Test_mm_castps_si128() { + Test_mm_castXX<__m128, __m128i, + decltype(_mm_castps_si128), _mm_castps_si128>(); +} + +void TSSEEmulTest::Test_mm_mul_epu32() { + char data0[16] = { + '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', + '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; + char data1[16] = { + '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', + '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'}; + ui32* dataw0 = reinterpret_cast<ui32*>(&data0); + ui32* dataw1 = reinterpret_cast<ui32*>(&data1); + + __m128i value0 = _mm_loadu_si128((__m128i*)&data0); + __m128i value1 = _mm_loadu_si128((__m128i*)&data1); + ui64 mul0 = (ui64) dataw0[0] * (ui64) dataw1[0]; ui64 mul1 = (ui64) dataw0[2] * (ui64) dataw1[2]; - - __m128i result = _mm_mul_epu32(value0, value1); - - UNIT_ASSERT_EQUAL(mul0, TQType<uint64x2_t>::As(result)[0]); - UNIT_ASSERT_EQUAL(mul1, TQType<uint64x2_t>::As(result)[1]); -} + + __m128i result = _mm_mul_epu32(value0, value1); + + UNIT_ASSERT_EQUAL(mul0, TQType<uint64x2_t>::As(result)[0]); + UNIT_ASSERT_EQUAL(mul1, TQType<uint64x2_t>::As(result)[1]); +} void TSSEEmulTest::Test_mm_cmpunord_ps() { alignas(16) float valuesBits[4] = {1.f, 2.f, 3.f, 4.f}; diff --git a/library/cpp/sse/ut/ya.make b/library/cpp/sse/ut/ya.make index 45e104971e..14cac6727a 100644 --- a/library/cpp/sse/ut/ya.make +++ b/library/cpp/sse/ut/ya.make @@ -1,13 +1,13 @@ UNITTEST_FOR(library/cpp/sse) - + OWNER(danlark) - -SRCS( + +SRCS( test.cpp -) - +) + IF (ARCH_X86_64) CFLAGS(-msse4.1 -msse4.2) ENDIF() -END() +END() diff --git a/library/cpp/testing/unittest/registar.h b/library/cpp/testing/unittest/registar.h index 44517a0092..28256b53f2 100644 --- a/library/cpp/testing/unittest/registar.h +++ b/library/cpp/testing/unittest/registar.h @@ -279,8 +279,8 @@ private: \ } \ \ virtual void Execute() override { \ - this->AtStart(); - + this->AtStart(); + #ifndef UT_SKIP_EXCEPTIONS #define CATCH_REACTION(FN, e, context) this->AddError(("(" + TypeName(e) + ") " + e.what()).data(), context) #define CATCH_REACTION_BT(FN, e, context) this->AddError(("(" + TypeName(e) + ") " + e.what()).data(), (e.BackTrace() ? e.BackTrace()->PrintToString() : TString()), context) diff --git a/library/cpp/testing/unittest/utmain.cpp b/library/cpp/testing/unittest/utmain.cpp index 305bc6b40f..cec11773ed 100644 --- a/library/cpp/testing/unittest/utmain.cpp +++ b/library/cpp/testing/unittest/utmain.cpp @@ -207,7 +207,7 @@ public: } else { TString suite = TString(name).substr(0, colon); EnabledSuites_.insert(suite); - EnabledSuites_.insert(name); + EnabledSuites_.insert(name); EnabledTests_.insert(name); EnabledTests_.insert(TString() + name + "::*"); } diff --git a/library/cpp/threading/light_rw_lock/bench/lightrwlock_test.cpp b/library/cpp/threading/light_rw_lock/bench/lightrwlock_test.cpp index c3027ea544..5e217c25ad 100644 --- a/library/cpp/threading/light_rw_lock/bench/lightrwlock_test.cpp +++ b/library/cpp/threading/light_rw_lock/bench/lightrwlock_test.cpp @@ -1,188 +1,188 @@ #include <library/cpp/threading/light_rw_lock/lightrwlock.h> -#include <util/random/random.h> - -#ifdef _linux_ -// Light rw lock is implemented only for linux - -using namespace NS_LightRWLock; - -#include <pthread.h> -#include <stdlib.h> -#include <stdio.h> - -#define LIGHT - -#ifdef RWSPINLOCK +#include <util/random/random.h> + +#ifdef _linux_ +// Light rw lock is implemented only for linux + +using namespace NS_LightRWLock; + +#include <pthread.h> +#include <stdlib.h> +#include <stdio.h> + +#define LIGHT + +#ifdef RWSPINLOCK #include <library/cpp/lwtrace/rwspinlock.h> -#endif - -#define CHECK_LOGIC 1 -#define LOOPCOUNT 1000000 -#define RANRCOUNT 100 -#define THREADCOUNT 40 -#define WRITELOCKS 100 - -#if defined(_MSC_VER) -static int Y_FORCE_INLINE AtomicFetchAdd(volatile int& item, int value) { - return _InterlockedExchangeAdd((&item, value); -} -#elif defined(__GNUC__) -#else +#endif + +#define CHECK_LOGIC 1 +#define LOOPCOUNT 1000000 +#define RANRCOUNT 100 +#define THREADCOUNT 40 +#define WRITELOCKS 100 + +#if defined(_MSC_VER) +static int Y_FORCE_INLINE AtomicFetchAdd(volatile int& item, int value) { + return _InterlockedExchangeAdd((&item, value); +} +#elif defined(__GNUC__) +#else #error unsupported platform -#endif - -class TPosixRWLock { -public: - TPosixRWLock() { - } - - ~TPosixRWLock() { - pthread_rwlock_destroy(&rwlock); - } - - TPosixRWLock(const TPosixRWLock&) = delete; - void operator=(const TPosixRWLock&) = delete; - -private: - pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER; - friend class TPosixRWShareLocker; - friend class TPosixRWExclusiveLocker; -}; - -#if defined(LIGHT) -TLightRWLock __attribute__((aligned(64))) rwlock; -#elif defined(POSIX) -TPosixRWLock rwlock; -#elif defined(RWSPINLOCK) -TRWSpinLock __attribute__((aligned(64))) rwlock; -#else -#error "define lock type" -#endif - -volatile __attribute__((aligned(64))) int checkIt = 0; -volatile int checkExcl = 0; - -class TPosixRWShareLocker { -public: - TPosixRWShareLocker(TPosixRWLock& lock) - : LockP_(&lock) - { - pthread_rwlock_rdlock(&LockP_->rwlock); - } - - ~TPosixRWShareLocker() { - pthread_rwlock_unlock(&LockP_->rwlock); - } - - TPosixRWShareLocker(const TPosixRWShareLocker&) = delete; - void operator=(const TPosixRWShareLocker&) = delete; - -private: - TPosixRWLock* LockP_; -}; - -class TPosixRWExclusiveLocker { -public: - TPosixRWExclusiveLocker(TPosixRWLock& lock) - : LockP_(&lock) - { - pthread_rwlock_wrlock(&LockP_->rwlock); - } - - ~TPosixRWExclusiveLocker() { - pthread_rwlock_unlock(&LockP_->rwlock); - } - TPosixRWExclusiveLocker(const TPosixRWExclusiveLocker&) = delete; - void operator=(const TPosixRWExclusiveLocker&) = delete; - -private: - TPosixRWLock* LockP_; -}; - -template <typename TLocker, bool excl> -static Y_FORCE_INLINE void Run() { - TLocker lockIt(rwlock); - -#if defined(CHECK_LOGIC) && CHECK_LOGIC - if (!excl && checkExcl == 1) { - printf("there is a bug\n"); - } - - int result = AtomicFetchAdd(checkIt, 1); - if (excl) - checkExcl = 1; - - if (excl && result > 1) - printf("there is a bug\n"); -#endif - - for (unsigned w = 0; w < RANRCOUNT; ++w) - RandomNumber<ui32>(); - -#if defined(CHECK_LOGIC) && CHECK_LOGIC - if (excl) - checkExcl = 0; - - AtomicFetchAdd(checkIt, -1); -#endif -} - -#ifdef LIGHT -static void* fast_thread_start(__attribute__((unused)) void* arg) { - for (unsigned q = 0; q < LOOPCOUNT; ++q) { - char excl = (RandomNumber<ui32>() % WRITELOCKS) == 0; - if (excl) - Run<TLightWriteGuard, 1>(); - else - Run<TLightReadGuard, 0>(); - } - return NULL; -} -#endif - -#ifdef POSIX -static void* fast_thread_start(__attribute__((unused)) void* arg) { - for (unsigned q = 0; q < LOOPCOUNT; ++q) { - char excl = (RandomNumber<ui32>() % WRITELOCKS) == 0; - if (excl) - Run<TPosixRWExclusiveLocker, 1>(); - else - Run<TPosixRWShareLocker, 0>(); - } - return NULL; -} -#endif - -#ifdef RWSPINLOCK -static void* fast_thread_start(__attribute__((unused)) void* arg) { - for (unsigned q = 0; q < LOOPCOUNT; ++q) { - char excl = (RandomNumber<ui32>() % WRITELOCKS) == 0; - if (excl) - Run<TWriteSpinLockGuard, 1>(); - else - Run<TReadSpinLockGuard, 0>(); - } - return NULL; -} -#endif - -int main() { - pthread_t threads[THREADCOUNT]; - - for (unsigned q = 0; q < THREADCOUNT; ++q) { - pthread_create(&(threads[q]), NULL, &fast_thread_start, NULL); - } - - for (unsigned q = 0; q < THREADCOUNT; ++q) - pthread_join(threads[q], NULL); - - return 0; -} - -#else // !_linux_ - -int main() { - return 0; -} - -#endif +#endif + +class TPosixRWLock { +public: + TPosixRWLock() { + } + + ~TPosixRWLock() { + pthread_rwlock_destroy(&rwlock); + } + + TPosixRWLock(const TPosixRWLock&) = delete; + void operator=(const TPosixRWLock&) = delete; + +private: + pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER; + friend class TPosixRWShareLocker; + friend class TPosixRWExclusiveLocker; +}; + +#if defined(LIGHT) +TLightRWLock __attribute__((aligned(64))) rwlock; +#elif defined(POSIX) +TPosixRWLock rwlock; +#elif defined(RWSPINLOCK) +TRWSpinLock __attribute__((aligned(64))) rwlock; +#else +#error "define lock type" +#endif + +volatile __attribute__((aligned(64))) int checkIt = 0; +volatile int checkExcl = 0; + +class TPosixRWShareLocker { +public: + TPosixRWShareLocker(TPosixRWLock& lock) + : LockP_(&lock) + { + pthread_rwlock_rdlock(&LockP_->rwlock); + } + + ~TPosixRWShareLocker() { + pthread_rwlock_unlock(&LockP_->rwlock); + } + + TPosixRWShareLocker(const TPosixRWShareLocker&) = delete; + void operator=(const TPosixRWShareLocker&) = delete; + +private: + TPosixRWLock* LockP_; +}; + +class TPosixRWExclusiveLocker { +public: + TPosixRWExclusiveLocker(TPosixRWLock& lock) + : LockP_(&lock) + { + pthread_rwlock_wrlock(&LockP_->rwlock); + } + + ~TPosixRWExclusiveLocker() { + pthread_rwlock_unlock(&LockP_->rwlock); + } + TPosixRWExclusiveLocker(const TPosixRWExclusiveLocker&) = delete; + void operator=(const TPosixRWExclusiveLocker&) = delete; + +private: + TPosixRWLock* LockP_; +}; + +template <typename TLocker, bool excl> +static Y_FORCE_INLINE void Run() { + TLocker lockIt(rwlock); + +#if defined(CHECK_LOGIC) && CHECK_LOGIC + if (!excl && checkExcl == 1) { + printf("there is a bug\n"); + } + + int result = AtomicFetchAdd(checkIt, 1); + if (excl) + checkExcl = 1; + + if (excl && result > 1) + printf("there is a bug\n"); +#endif + + for (unsigned w = 0; w < RANRCOUNT; ++w) + RandomNumber<ui32>(); + +#if defined(CHECK_LOGIC) && CHECK_LOGIC + if (excl) + checkExcl = 0; + + AtomicFetchAdd(checkIt, -1); +#endif +} + +#ifdef LIGHT +static void* fast_thread_start(__attribute__((unused)) void* arg) { + for (unsigned q = 0; q < LOOPCOUNT; ++q) { + char excl = (RandomNumber<ui32>() % WRITELOCKS) == 0; + if (excl) + Run<TLightWriteGuard, 1>(); + else + Run<TLightReadGuard, 0>(); + } + return NULL; +} +#endif + +#ifdef POSIX +static void* fast_thread_start(__attribute__((unused)) void* arg) { + for (unsigned q = 0; q < LOOPCOUNT; ++q) { + char excl = (RandomNumber<ui32>() % WRITELOCKS) == 0; + if (excl) + Run<TPosixRWExclusiveLocker, 1>(); + else + Run<TPosixRWShareLocker, 0>(); + } + return NULL; +} +#endif + +#ifdef RWSPINLOCK +static void* fast_thread_start(__attribute__((unused)) void* arg) { + for (unsigned q = 0; q < LOOPCOUNT; ++q) { + char excl = (RandomNumber<ui32>() % WRITELOCKS) == 0; + if (excl) + Run<TWriteSpinLockGuard, 1>(); + else + Run<TReadSpinLockGuard, 0>(); + } + return NULL; +} +#endif + +int main() { + pthread_t threads[THREADCOUNT]; + + for (unsigned q = 0; q < THREADCOUNT; ++q) { + pthread_create(&(threads[q]), NULL, &fast_thread_start, NULL); + } + + for (unsigned q = 0; q < THREADCOUNT; ++q) + pthread_join(threads[q], NULL); + + return 0; +} + +#else // !_linux_ + +int main() { + return 0; +} + +#endif diff --git a/library/cpp/threading/light_rw_lock/bench/ya.make b/library/cpp/threading/light_rw_lock/bench/ya.make index 7969b52a50..ed89e3a9b0 100644 --- a/library/cpp/threading/light_rw_lock/bench/ya.make +++ b/library/cpp/threading/light_rw_lock/bench/ya.make @@ -1,13 +1,13 @@ -PROGRAM(lightrwlock_test) - -OWNER(agri) - -SRCS( - lightrwlock_test.cpp -) - -PEERDIR( +PROGRAM(lightrwlock_test) + +OWNER(agri) + +SRCS( + lightrwlock_test.cpp +) + +PEERDIR( library/cpp/threading/light_rw_lock -) - -END() +) + +END() diff --git a/library/cpp/threading/light_rw_lock/lightrwlock.cpp b/library/cpp/threading/light_rw_lock/lightrwlock.cpp index fbb63fd47f..58456907d2 100644 --- a/library/cpp/threading/light_rw_lock/lightrwlock.cpp +++ b/library/cpp/threading/light_rw_lock/lightrwlock.cpp @@ -1,113 +1,113 @@ -#include "lightrwlock.h" -#include <util/system/spinlock.h> - -#if defined(_linux_) - -using namespace NS_LightRWLock; - -void TLightRWLock::WaitForUntrappedShared() { - for (;;) { - for (ui32 i = 0; i < SpinCount_; ++i) { - SpinLockPause(); - - if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) - return; - } - - SequenceStore(UnshareFutex_, 1); - if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) { - AtomicStore(UnshareFutex_, 0); - return; - } - FutexWait(UnshareFutex_, 1); - } -} - -void TLightRWLock::WaitForExclusiveAndUntrappedShared() { - for (;;) { - for (ui32 i = 0; i < SpinCount_; ++i) { - SpinLockPause(); - - if (AtomicLoad(Counter_) >= 0) - goto try_to_get_lock; - if (AtomicLoad(TrappedFutex_) == 1) - goto skip_store_trapped; - } - - SequenceStore(TrappedFutex_, 1); - skip_store_trapped: - - if (AtomicLoad(Counter_) < 0) { - FutexWait(TrappedFutex_, 1); - } - - try_to_get_lock: - if (!AtomicSetBit(Counter_, 31)) - break; - } - - for (ui32 j = 0;; ++j) { - for (ui32 i = 0; i < SpinCount_; ++i) { - if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) - return; - - SpinLockPause(); - } - - SequenceStore(UnshareFutex_, 1); - - if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) { - AtomicStore(UnshareFutex_, 0); - return; - } - - FutexWait(UnshareFutex_, 1); - } -} - -void TLightRWLock::WaitForUntrappedAndAcquireRead() { - if (AtomicFetchAdd(Counter_, -1) < 0) - goto skip_lock_try; - - for (;;) { - again: - if (Y_UNLIKELY(AtomicFetchAdd(Counter_, 1) >= 0)) { - return; - } else { - if (AtomicFetchAdd(Counter_, -1) >= 0) - goto again; - } - - skip_lock_try: +#include "lightrwlock.h" +#include <util/system/spinlock.h> + +#if defined(_linux_) + +using namespace NS_LightRWLock; + +void TLightRWLock::WaitForUntrappedShared() { + for (;;) { + for (ui32 i = 0; i < SpinCount_; ++i) { + SpinLockPause(); + + if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) + return; + } + + SequenceStore(UnshareFutex_, 1); + if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) { + AtomicStore(UnshareFutex_, 0); + return; + } + FutexWait(UnshareFutex_, 1); + } +} + +void TLightRWLock::WaitForExclusiveAndUntrappedShared() { + for (;;) { + for (ui32 i = 0; i < SpinCount_; ++i) { + SpinLockPause(); + + if (AtomicLoad(Counter_) >= 0) + goto try_to_get_lock; + if (AtomicLoad(TrappedFutex_) == 1) + goto skip_store_trapped; + } + + SequenceStore(TrappedFutex_, 1); + skip_store_trapped: + + if (AtomicLoad(Counter_) < 0) { + FutexWait(TrappedFutex_, 1); + } + + try_to_get_lock: + if (!AtomicSetBit(Counter_, 31)) + break; + } + + for (ui32 j = 0;; ++j) { + for (ui32 i = 0; i < SpinCount_; ++i) { + if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) + return; + + SpinLockPause(); + } + + SequenceStore(UnshareFutex_, 1); + + if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) { + AtomicStore(UnshareFutex_, 0); + return; + } + + FutexWait(UnshareFutex_, 1); + } +} + +void TLightRWLock::WaitForUntrappedAndAcquireRead() { + if (AtomicFetchAdd(Counter_, -1) < 0) + goto skip_lock_try; + + for (;;) { + again: + if (Y_UNLIKELY(AtomicFetchAdd(Counter_, 1) >= 0)) { + return; + } else { + if (AtomicFetchAdd(Counter_, -1) >= 0) + goto again; + } + + skip_lock_try: if (AtomicLoad(UnshareFutex_) && (AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) { - SequenceStore(UnshareFutex_, 0); - FutexWake(UnshareFutex_, 1); - } - - for (;;) { - for (ui32 i = 0; i < SpinCount_; ++i) { - SpinLockPause(); - - if (AtomicLoad(Counter_) >= 0) - goto again; - if (AtomicLoad(TrappedFutex_) == 1) - goto skip_store_trapped; - } - - SequenceStore(TrappedFutex_, 1); - skip_store_trapped: - - if (AtomicLoad(Counter_) < 0) { - FutexWait(TrappedFutex_, 1); - if (AtomicLoad(Counter_) < 0) - goto again; - } else if (AtomicLoad(TrappedFutex_)) { - SequenceStore(TrappedFutex_, 0); - FutexWake(TrappedFutex_, 0x7fffffff); - } - break; - } - } -} - -#endif // _linux_ + SequenceStore(UnshareFutex_, 0); + FutexWake(UnshareFutex_, 1); + } + + for (;;) { + for (ui32 i = 0; i < SpinCount_; ++i) { + SpinLockPause(); + + if (AtomicLoad(Counter_) >= 0) + goto again; + if (AtomicLoad(TrappedFutex_) == 1) + goto skip_store_trapped; + } + + SequenceStore(TrappedFutex_, 1); + skip_store_trapped: + + if (AtomicLoad(Counter_) < 0) { + FutexWait(TrappedFutex_, 1); + if (AtomicLoad(Counter_) < 0) + goto again; + } else if (AtomicLoad(TrappedFutex_)) { + SequenceStore(TrappedFutex_, 0); + FutexWake(TrappedFutex_, 0x7fffffff); + } + break; + } + } +} + +#endif // _linux_ diff --git a/library/cpp/threading/light_rw_lock/lightrwlock.h b/library/cpp/threading/light_rw_lock/lightrwlock.h index 931a1817bc..4411787169 100644 --- a/library/cpp/threading/light_rw_lock/lightrwlock.h +++ b/library/cpp/threading/light_rw_lock/lightrwlock.h @@ -1,45 +1,45 @@ -#pragma once +#pragma once -#include <util/system/rwlock.h> +#include <util/system/rwlock.h> #include <util/system/sanitizers.h> - -#if defined(_linux_) -/* TLightRWLock is optimized for read lock and very fast lock/unlock switching. - Read lock increments counter. - Write lock sets highest bit of counter (makes counter negative). - - Whenever a thread tries to acquire read lock that thread increments - the counter. If the thread gets negative value of the counter right just - after the increment that means write lock was acquired in another thread. - In that case the thread decrements the counter back, wakes one thread on - UnshareFutex, waits on the TrappedFutex and then tries acquire read lock - from the beginning. - If the thread gets positive value of the counter after the increment - then read lock was successfully acquired and - the thread can proceed execution. - - Whenever a thread tries to acquire write lock that thread set the highest bit - of the counter. If the thread determine that the bit was set previously then - write lock was acquired in another thread. In that case the thread waits on - the TrappedFutex and then tries again from the beginning. - If the highest bit was successfully set then thread check if any read lock - exists at the moment. If so the thread waits on UnshareFutex. If there is - no more read locks then write lock was successfully acquired and the thread - can proceed execution. -*/ - -#include <linux/futex.h> + +#if defined(_linux_) +/* TLightRWLock is optimized for read lock and very fast lock/unlock switching. + Read lock increments counter. + Write lock sets highest bit of counter (makes counter negative). + + Whenever a thread tries to acquire read lock that thread increments + the counter. If the thread gets negative value of the counter right just + after the increment that means write lock was acquired in another thread. + In that case the thread decrements the counter back, wakes one thread on + UnshareFutex, waits on the TrappedFutex and then tries acquire read lock + from the beginning. + If the thread gets positive value of the counter after the increment + then read lock was successfully acquired and + the thread can proceed execution. + + Whenever a thread tries to acquire write lock that thread set the highest bit + of the counter. If the thread determine that the bit was set previously then + write lock was acquired in another thread. In that case the thread waits on + the TrappedFutex and then tries again from the beginning. + If the highest bit was successfully set then thread check if any read lock + exists at the moment. If so the thread waits on UnshareFutex. If there is + no more read locks then write lock was successfully acquired and the thread + can proceed execution. +*/ + +#include <linux/futex.h> #include <unistd.h> -#include <sys/syscall.h> -#include <errno.h> - -namespace NS_LightRWLock { +#include <sys/syscall.h> +#include <errno.h> + +namespace NS_LightRWLock { static int Y_FORCE_INLINE AtomicFetchAdd(volatile int& item, int value) { return __atomic_fetch_add(&item, value, __ATOMIC_SEQ_CST); } - -#if defined(_x86_64_) || defined(_i386_) - + +#if defined(_x86_64_) || defined(_i386_) + static char Y_FORCE_INLINE AtomicSetBit(volatile int& item, unsigned bit) { char ret; __asm__ __volatile__( @@ -54,7 +54,7 @@ namespace NS_LightRWLock { return ret; } - + static char Y_FORCE_INLINE AtomicClearBit(volatile int& item, unsigned bit) { char ret; __asm__ __volatile__( @@ -69,22 +69,22 @@ namespace NS_LightRWLock { return ret; } - - -#else - + + +#else + static char Y_FORCE_INLINE AtomicSetBit(volatile int& item, unsigned bit) { int prev = __atomic_fetch_or(&item, 1 << bit, __ATOMIC_SEQ_CST); return (prev & (1 << bit)) != 0 ? 1 : 0; } - + static char Y_FORCE_INLINE AtomicClearBit(volatile int& item, unsigned bit) { int prev = __atomic_fetch_and(&item, ~(1 << bit), __ATOMIC_SEQ_CST); return (prev & (1 << bit)) != 0 ? 1 : 0; } #endif - + #if defined(_x86_64_) || defined(_i386_) || defined (__aarch64__) || defined (__powerpc64__) static bool AtomicLockHighByte(volatile int& item) { union TA { @@ -98,23 +98,23 @@ namespace NS_LightRWLock { __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); } -#endif - +#endif + template <typename TInt> static void Y_FORCE_INLINE AtomicStore(volatile TInt& var, TInt value) { __atomic_store_n(&var, value, __ATOMIC_RELEASE); } - + template <typename TInt> static void Y_FORCE_INLINE SequenceStore(volatile TInt& var, TInt value) { __atomic_store_n(&var, value, __ATOMIC_SEQ_CST); } - + template <typename TInt> static TInt Y_FORCE_INLINE AtomicLoad(const volatile TInt& var) { return __atomic_load_n(&var, __ATOMIC_ACQUIRE); } - + static void Y_FORCE_INLINE FutexWait(volatile int& fvar, int value) { for (;;) { int result = @@ -126,9 +126,9 @@ namespace NS_LightRWLock { continue; Y_FAIL("futex error"); } - } - } - + } + } + static void Y_FORCE_INLINE FutexWake(volatile int& fvar, int amount) { const int result = syscall(SYS_futex, &fvar, FUTEX_WAKE_PRIVATE, amount, NULL, NULL, 0); @@ -136,85 +136,85 @@ namespace NS_LightRWLock { Y_FAIL("futex error"); } -} - -class alignas(64) TLightRWLock { -public: - TLightRWLock(ui32 spinCount = 10) - : Counter_(0) - , TrappedFutex_(0) - , UnshareFutex_(0) - , SpinCount_(spinCount) +} + +class alignas(64) TLightRWLock { +public: + TLightRWLock(ui32 spinCount = 10) + : Counter_(0) + , TrappedFutex_(0) + , UnshareFutex_(0) + , SpinCount_(spinCount) { } - - TLightRWLock(const TLightRWLock&) = delete; - void operator=(const TLightRWLock&) = delete; - - Y_FORCE_INLINE void AcquireWrite() { - using namespace NS_LightRWLock; - - if (AtomicLockHighByte(Counter_)) { - if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) - return; - return WaitForUntrappedShared(); - } - WaitForExclusiveAndUntrappedShared(); - } - - Y_FORCE_INLINE void AcquireRead() { - using namespace NS_LightRWLock; - - if (Y_LIKELY(AtomicFetchAdd(Counter_, 1) >= 0)) - return; - WaitForUntrappedAndAcquireRead(); - } - - Y_FORCE_INLINE void ReleaseWrite() { - using namespace NS_LightRWLock; - - AtomicClearBit(Counter_, 31); - if (AtomicLoad(TrappedFutex_)) { - SequenceStore(TrappedFutex_, 0); - FutexWake(TrappedFutex_, 0x7fffffff); - } - } - - Y_FORCE_INLINE void ReleaseRead() { - using namespace NS_LightRWLock; - - if (Y_LIKELY(AtomicFetchAdd(Counter_, -1) >= 0)) - return; - if (!AtomicLoad(UnshareFutex_)) - return; - if ((AtomicLoad(Counter_) & 0x7fffffff) == 0) { - SequenceStore(UnshareFutex_, 0); - FutexWake(UnshareFutex_, 1); - } - } - -private: - volatile int Counter_; - volatile int TrappedFutex_; - volatile int UnshareFutex_; - const ui32 SpinCount_; - - void WaitForUntrappedShared(); - void WaitForExclusiveAndUntrappedShared(); - void WaitForUntrappedAndAcquireRead(); -}; - -#else - -class TLightRWLock: public TRWMutex { -public: + + TLightRWLock(const TLightRWLock&) = delete; + void operator=(const TLightRWLock&) = delete; + + Y_FORCE_INLINE void AcquireWrite() { + using namespace NS_LightRWLock; + + if (AtomicLockHighByte(Counter_)) { + if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) + return; + return WaitForUntrappedShared(); + } + WaitForExclusiveAndUntrappedShared(); + } + + Y_FORCE_INLINE void AcquireRead() { + using namespace NS_LightRWLock; + + if (Y_LIKELY(AtomicFetchAdd(Counter_, 1) >= 0)) + return; + WaitForUntrappedAndAcquireRead(); + } + + Y_FORCE_INLINE void ReleaseWrite() { + using namespace NS_LightRWLock; + + AtomicClearBit(Counter_, 31); + if (AtomicLoad(TrappedFutex_)) { + SequenceStore(TrappedFutex_, 0); + FutexWake(TrappedFutex_, 0x7fffffff); + } + } + + Y_FORCE_INLINE void ReleaseRead() { + using namespace NS_LightRWLock; + + if (Y_LIKELY(AtomicFetchAdd(Counter_, -1) >= 0)) + return; + if (!AtomicLoad(UnshareFutex_)) + return; + if ((AtomicLoad(Counter_) & 0x7fffffff) == 0) { + SequenceStore(UnshareFutex_, 0); + FutexWake(UnshareFutex_, 1); + } + } + +private: + volatile int Counter_; + volatile int TrappedFutex_; + volatile int UnshareFutex_; + const ui32 SpinCount_; + + void WaitForUntrappedShared(); + void WaitForExclusiveAndUntrappedShared(); + void WaitForUntrappedAndAcquireRead(); +}; + +#else + +class TLightRWLock: public TRWMutex { +public: TLightRWLock() { } TLightRWLock(ui32) { } -}; - -#endif - -using TLightReadGuard = TReadGuardBase<TLightRWLock>; -using TLightWriteGuard = TWriteGuardBase<TLightRWLock>; +}; + +#endif + +using TLightReadGuard = TReadGuardBase<TLightRWLock>; +using TLightWriteGuard = TWriteGuardBase<TLightRWLock>; diff --git a/library/cpp/threading/light_rw_lock/ut/rwlock_ut.cpp b/library/cpp/threading/light_rw_lock/ut/rwlock_ut.cpp index e82063d959..3ed4bf68fa 100644 --- a/library/cpp/threading/light_rw_lock/ut/rwlock_ut.cpp +++ b/library/cpp/threading/light_rw_lock/ut/rwlock_ut.cpp @@ -1,122 +1,122 @@ #include <library/cpp/threading/light_rw_lock/lightrwlock.h> #include <library/cpp/testing/unittest/registar.h> -#include <util/random/random.h> -#include <util/system/atomic.h> +#include <util/random/random.h> +#include <util/system/atomic.h> #include <util/thread/pool.h> - -class TRWMutexTest: public TTestBase { - UNIT_TEST_SUITE(TRWMutexTest); - UNIT_TEST(TestReaders) - UNIT_TEST(TestReadersWriters) - UNIT_TEST_SUITE_END(); - - struct TSharedData { - TSharedData() - : writersIn(0) - , readersIn(0) - , failed(false) - { - } - - TAtomic writersIn; - TAtomic readersIn; - - bool failed; - - TLightRWLock mutex; - }; - - class TThreadTask: public IObjectInQueue { - public: - using PFunc = void (TThreadTask::*)(void); - - TThreadTask(PFunc func, TSharedData& data, size_t id, size_t total) - : Func_(func) - , Data_(data) - , Id_(id) - , Total_(total) - { - } - - void Process(void*) override { - THolder<TThreadTask> This(this); - - (this->*Func_)(); - } - -#define FAIL_ASSERT(cond) \ - if (!(cond)) { \ - Data_.failed = true; \ - } - void RunReaders() { - Data_.mutex.AcquireRead(); - - AtomicIncrement(Data_.readersIn); - usleep(100); - FAIL_ASSERT(Data_.readersIn == long(Total_)); - usleep(100); - AtomicDecrement(Data_.readersIn); - - Data_.mutex.ReleaseRead(); - } - - void RunReadersWriters() { - if (Id_ % 2 == 0) { - for (size_t i = 0; i < 10; ++i) { - Data_.mutex.AcquireRead(); - - AtomicIncrement(Data_.readersIn); - FAIL_ASSERT(Data_.writersIn == 0); - usleep(RandomNumber<ui32>() % 5); - AtomicDecrement(Data_.readersIn); - - Data_.mutex.ReleaseRead(); - } - } else { - for (size_t i = 0; i < 10; ++i) { - Data_.mutex.AcquireWrite(); - - AtomicIncrement(Data_.writersIn); - FAIL_ASSERT(Data_.readersIn == 0 && Data_.writersIn == 1); - usleep(RandomNumber<ui32>() % 5); - AtomicDecrement(Data_.writersIn); - - Data_.mutex.ReleaseWrite(); - } - } - } -#undef FAIL_ASSERT - - private: - PFunc Func_; - TSharedData& Data_; - size_t Id_; - size_t Total_; - }; - -private: -#define RUN_CYCLE(what, count) \ - Q_.Start(count); \ - for (size_t i = 0; i < count; ++i) { \ - UNIT_ASSERT(Q_.Add(new TThreadTask(&TThreadTask::what, Data_, i, count))); \ - } \ - Q_.Stop(); \ - bool b = Data_.failed; \ - Data_.failed = false; \ - UNIT_ASSERT(!b); - - void TestReaders() { - RUN_CYCLE(RunReaders, 1); - } - - void TestReadersWriters() { - RUN_CYCLE(RunReadersWriters, 1); - } - -#undef RUN_CYCLE -private: - TSharedData Data_; + +class TRWMutexTest: public TTestBase { + UNIT_TEST_SUITE(TRWMutexTest); + UNIT_TEST(TestReaders) + UNIT_TEST(TestReadersWriters) + UNIT_TEST_SUITE_END(); + + struct TSharedData { + TSharedData() + : writersIn(0) + , readersIn(0) + , failed(false) + { + } + + TAtomic writersIn; + TAtomic readersIn; + + bool failed; + + TLightRWLock mutex; + }; + + class TThreadTask: public IObjectInQueue { + public: + using PFunc = void (TThreadTask::*)(void); + + TThreadTask(PFunc func, TSharedData& data, size_t id, size_t total) + : Func_(func) + , Data_(data) + , Id_(id) + , Total_(total) + { + } + + void Process(void*) override { + THolder<TThreadTask> This(this); + + (this->*Func_)(); + } + +#define FAIL_ASSERT(cond) \ + if (!(cond)) { \ + Data_.failed = true; \ + } + void RunReaders() { + Data_.mutex.AcquireRead(); + + AtomicIncrement(Data_.readersIn); + usleep(100); + FAIL_ASSERT(Data_.readersIn == long(Total_)); + usleep(100); + AtomicDecrement(Data_.readersIn); + + Data_.mutex.ReleaseRead(); + } + + void RunReadersWriters() { + if (Id_ % 2 == 0) { + for (size_t i = 0; i < 10; ++i) { + Data_.mutex.AcquireRead(); + + AtomicIncrement(Data_.readersIn); + FAIL_ASSERT(Data_.writersIn == 0); + usleep(RandomNumber<ui32>() % 5); + AtomicDecrement(Data_.readersIn); + + Data_.mutex.ReleaseRead(); + } + } else { + for (size_t i = 0; i < 10; ++i) { + Data_.mutex.AcquireWrite(); + + AtomicIncrement(Data_.writersIn); + FAIL_ASSERT(Data_.readersIn == 0 && Data_.writersIn == 1); + usleep(RandomNumber<ui32>() % 5); + AtomicDecrement(Data_.writersIn); + + Data_.mutex.ReleaseWrite(); + } + } + } +#undef FAIL_ASSERT + + private: + PFunc Func_; + TSharedData& Data_; + size_t Id_; + size_t Total_; + }; + +private: +#define RUN_CYCLE(what, count) \ + Q_.Start(count); \ + for (size_t i = 0; i < count; ++i) { \ + UNIT_ASSERT(Q_.Add(new TThreadTask(&TThreadTask::what, Data_, i, count))); \ + } \ + Q_.Stop(); \ + bool b = Data_.failed; \ + Data_.failed = false; \ + UNIT_ASSERT(!b); + + void TestReaders() { + RUN_CYCLE(RunReaders, 1); + } + + void TestReadersWriters() { + RUN_CYCLE(RunReadersWriters, 1); + } + +#undef RUN_CYCLE +private: + TSharedData Data_; TThreadPool Q_; -}; - -UNIT_TEST_SUITE_REGISTRATION(TRWMutexTest) +}; + +UNIT_TEST_SUITE_REGISTRATION(TRWMutexTest) diff --git a/library/cpp/threading/light_rw_lock/ut/ya.make b/library/cpp/threading/light_rw_lock/ut/ya.make index 92928b837c..9b1a54d7ec 100644 --- a/library/cpp/threading/light_rw_lock/ut/ya.make +++ b/library/cpp/threading/light_rw_lock/ut/ya.make @@ -1,9 +1,9 @@ UNITTEST_FOR(library/cpp/threading/light_rw_lock) - + OWNER(agri) - -SRCS( - rwlock_ut.cpp -) - -END() + +SRCS( + rwlock_ut.cpp +) + +END() diff --git a/library/cpp/threading/light_rw_lock/ya.make b/library/cpp/threading/light_rw_lock/ya.make index a196fb8588..e86fd42223 100644 --- a/library/cpp/threading/light_rw_lock/ya.make +++ b/library/cpp/threading/light_rw_lock/ya.make @@ -1,10 +1,10 @@ -LIBRARY() - -OWNER(agri) - -SRCS( - lightrwlock.cpp - lightrwlock.h -) - -END() +LIBRARY() + +OWNER(agri) + +SRCS( + lightrwlock.cpp + lightrwlock.h +) + +END() diff --git a/library/cpp/threading/queue/basic_ut.cpp b/library/cpp/threading/queue/basic_ut.cpp index 5f56f8583e..2db5d6e8e8 100644 --- a/library/cpp/threading/queue/basic_ut.cpp +++ b/library/cpp/threading/queue/basic_ut.cpp @@ -1,92 +1,92 @@ #include <library/cpp/testing/unittest/registar.h> -#include <util/generic/vector.h> -#include <util/system/thread.h> - -#include "ut_helpers.h" - +#include <util/generic/vector.h> +#include <util/system/thread.h> + +#include "ut_helpers.h" + template <typename TQueueType> -class TQueueTestsInSingleThread: public TTestBase { -private: +class TQueueTestsInSingleThread: public TTestBase { +private: using TSelf = TQueueTestsInSingleThread<TQueueType>; - using TLink = TIntrusiveLink; - - UNIT_TEST_SUITE_DEMANGLE(TSelf); - UNIT_TEST(OnePushOnePop) - UNIT_TEST(OnePushOnePop_Repeat1M) - UNIT_TEST(Threads8_Repeat1M_Push1Pop1) - UNIT_TEST_SUITE_END(); - -public: - void OnePushOnePop() { + using TLink = TIntrusiveLink; + + UNIT_TEST_SUITE_DEMANGLE(TSelf); + UNIT_TEST(OnePushOnePop) + UNIT_TEST(OnePushOnePop_Repeat1M) + UNIT_TEST(Threads8_Repeat1M_Push1Pop1) + UNIT_TEST_SUITE_END(); + +public: + void OnePushOnePop() { TQueueType queue; - - auto popped = queue.Pop(); - UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); - - TLink msg; - queue.Push(&msg); - popped = queue.Pop(); - UNIT_ASSERT_VALUES_EQUAL(&msg, popped); - - popped = queue.Pop(); - UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); - }; - - void OnePushOnePop_Repeat1M() { + + auto popped = queue.Pop(); + UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); + + TLink msg; + queue.Push(&msg); + popped = queue.Pop(); + UNIT_ASSERT_VALUES_EQUAL(&msg, popped); + + popped = queue.Pop(); + UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); + }; + + void OnePushOnePop_Repeat1M() { TQueueType queue; - TLink msg; - - auto popped = queue.Pop(); - UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); - - for (int i = 0; i < 1000000; ++i) { - queue.Push(&msg); - popped = queue.Pop(); - UNIT_ASSERT_VALUES_EQUAL(&msg, popped); - - popped = queue.Pop(); - UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); - } - } - - template <size_t NUMBER_OF_THREADS> - void RepeatPush1Pop1_InManyThreads() { + TLink msg; + + auto popped = queue.Pop(); + UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); + + for (int i = 0; i < 1000000; ++i) { + queue.Push(&msg); + popped = queue.Pop(); + UNIT_ASSERT_VALUES_EQUAL(&msg, popped); + + popped = queue.Pop(); + UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); + } + } + + template <size_t NUMBER_OF_THREADS> + void RepeatPush1Pop1_InManyThreads() { class TCycleThread: public ISimpleThread { - public: - void* ThreadProc() override { + public: + void* ThreadProc() override { TQueueType queue; - TLink msg; - auto popped = queue.Pop(); - UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); - - for (size_t i = 0; i < 1000000; ++i) { - queue.Push(&msg); - popped = queue.Pop(); - UNIT_ASSERT_VALUES_EQUAL(popped, &msg); - - popped = queue.Pop(); - UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); - } - return nullptr; - } - }; - + TLink msg; + auto popped = queue.Pop(); + UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); + + for (size_t i = 0; i < 1000000; ++i) { + queue.Push(&msg); + popped = queue.Pop(); + UNIT_ASSERT_VALUES_EQUAL(popped, &msg); + + popped = queue.Pop(); + UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); + } + return nullptr; + } + }; + TVector<TAutoPtr<TCycleThread>> cyclers; - - for (size_t i = 0; i < NUMBER_OF_THREADS; ++i) { - cyclers.emplace_back(new TCycleThread); - cyclers.back()->Start(); - } - - for (size_t i = 0; i < NUMBER_OF_THREADS; ++i) { - cyclers[i]->Join(); - } - } - - void Threads8_Repeat1M_Push1Pop1() { - RepeatPush1Pop1_InManyThreads<8>(); - } -}; - -REGISTER_TESTS_FOR_ALL_ORDERED_QUEUES(TQueueTestsInSingleThread); -REGISTER_TESTS_FOR_ALL_UNORDERED_QUEUES(TQueueTestsInSingleThread) + + for (size_t i = 0; i < NUMBER_OF_THREADS; ++i) { + cyclers.emplace_back(new TCycleThread); + cyclers.back()->Start(); + } + + for (size_t i = 0; i < NUMBER_OF_THREADS; ++i) { + cyclers[i]->Join(); + } + } + + void Threads8_Repeat1M_Push1Pop1() { + RepeatPush1Pop1_InManyThreads<8>(); + } +}; + +REGISTER_TESTS_FOR_ALL_ORDERED_QUEUES(TQueueTestsInSingleThread); +REGISTER_TESTS_FOR_ALL_UNORDERED_QUEUES(TQueueTestsInSingleThread) diff --git a/library/cpp/threading/queue/mpmc_unordered_ring.cpp b/library/cpp/threading/queue/mpmc_unordered_ring.cpp index 160547f594..df48182210 100644 --- a/library/cpp/threading/queue/mpmc_unordered_ring.cpp +++ b/library/cpp/threading/queue/mpmc_unordered_ring.cpp @@ -1,74 +1,74 @@ -#include "mpmc_unordered_ring.h" - -namespace NThreading { - TMPMCUnorderedRing::TMPMCUnorderedRing(size_t size) { - Y_VERIFY(size > 0); - RingSize = size; - RingBuffer.Reset(new void*[size]); - memset(&RingBuffer[0], 0, sizeof(void*) * size); - } - - bool TMPMCUnorderedRing::Push(void* msg, ui16 retryCount) noexcept { - if (retryCount == 0) { - StubbornPush(msg); - return true; - } - for (ui16 itry = retryCount; itry-- > 0;) { - if (WeakPush(msg)) { - return true; - } - } - return false; - } - - bool TMPMCUnorderedRing::WeakPush(void* msg) noexcept { - auto pawl = AtomicIncrement(WritePawl); - if (pawl - AtomicGet(ReadFront) >= RingSize) { - // Queue is full - AtomicDecrement(WritePawl); - return false; - } - - auto writeSlot = AtomicGetAndIncrement(WriteFront); - if (AtomicCas(&RingBuffer[writeSlot % RingSize], msg, nullptr)) { - return true; - } - // slot is occupied for some reason, retry - return false; - } - - void* TMPMCUnorderedRing::Pop() noexcept { - ui64 readSlot; - - for (ui16 itry = MAX_POP_TRIES; itry-- > 0;) { - auto pawl = AtomicIncrement(ReadPawl); - if (pawl > AtomicGet(WriteFront)) { - // Queue is empty - AtomicDecrement(ReadPawl); - return nullptr; - } - - readSlot = AtomicGetAndIncrement(ReadFront); - - auto msg = AtomicSwap(&RingBuffer[readSlot % RingSize], nullptr); - if (msg != nullptr) { - return msg; - } - } - - /* got no message in the slot, let's try to rollback readfront */ - AtomicCas(&ReadFront, readSlot - 1, readSlot); - return nullptr; - } - - void* TMPMCUnorderedRing::UnsafeScanningPop(ui64* last) noexcept { - for (; *last < RingSize;) { - auto msg = AtomicSwap(&RingBuffer[*last], nullptr); - ++*last; - if (msg != nullptr) { - return msg; - } - } - return nullptr; - } -} +#include "mpmc_unordered_ring.h" + +namespace NThreading { + TMPMCUnorderedRing::TMPMCUnorderedRing(size_t size) { + Y_VERIFY(size > 0); + RingSize = size; + RingBuffer.Reset(new void*[size]); + memset(&RingBuffer[0], 0, sizeof(void*) * size); + } + + bool TMPMCUnorderedRing::Push(void* msg, ui16 retryCount) noexcept { + if (retryCount == 0) { + StubbornPush(msg); + return true; + } + for (ui16 itry = retryCount; itry-- > 0;) { + if (WeakPush(msg)) { + return true; + } + } + return false; + } + + bool TMPMCUnorderedRing::WeakPush(void* msg) noexcept { + auto pawl = AtomicIncrement(WritePawl); + if (pawl - AtomicGet(ReadFront) >= RingSize) { + // Queue is full + AtomicDecrement(WritePawl); + return false; + } + + auto writeSlot = AtomicGetAndIncrement(WriteFront); + if (AtomicCas(&RingBuffer[writeSlot % RingSize], msg, nullptr)) { + return true; + } + // slot is occupied for some reason, retry + return false; + } + + void* TMPMCUnorderedRing::Pop() noexcept { + ui64 readSlot; + + for (ui16 itry = MAX_POP_TRIES; itry-- > 0;) { + auto pawl = AtomicIncrement(ReadPawl); + if (pawl > AtomicGet(WriteFront)) { + // Queue is empty + AtomicDecrement(ReadPawl); + return nullptr; + } + + readSlot = AtomicGetAndIncrement(ReadFront); + + auto msg = AtomicSwap(&RingBuffer[readSlot % RingSize], nullptr); + if (msg != nullptr) { + return msg; + } + } + + /* got no message in the slot, let's try to rollback readfront */ + AtomicCas(&ReadFront, readSlot - 1, readSlot); + return nullptr; + } + + void* TMPMCUnorderedRing::UnsafeScanningPop(ui64* last) noexcept { + for (; *last < RingSize;) { + auto msg = AtomicSwap(&RingBuffer[*last], nullptr); + ++*last; + if (msg != nullptr) { + return msg; + } + } + return nullptr; + } +} diff --git a/library/cpp/threading/queue/mpmc_unordered_ring.h b/library/cpp/threading/queue/mpmc_unordered_ring.h index 5042f7528e..59758d2c35 100644 --- a/library/cpp/threading/queue/mpmc_unordered_ring.h +++ b/library/cpp/threading/queue/mpmc_unordered_ring.h @@ -1,42 +1,42 @@ -#pragma once - -/* - It's not a general purpose queue. - No order guarantee, but it mostly ordered. - Items may stuck in almost empty queue. - Use UnsafeScanningPop to pop all stuck items. - Almost wait-free for producers and consumers. - */ - -#include <util/system/atomic.h> -#include <util/generic/ptr.h> - -namespace NThreading { - struct TMPMCUnorderedRing { - public: - static constexpr ui16 MAX_PUSH_TRIES = 4; - static constexpr ui16 MAX_POP_TRIES = 4; - - TMPMCUnorderedRing(size_t size); - - bool Push(void* msg, ui16 retryCount = MAX_PUSH_TRIES) noexcept; - void StubbornPush(void* msg) { - while (!WeakPush(msg)) { - } - } - - void* Pop() noexcept; - - void* UnsafeScanningPop(ui64* last) noexcept; - - private: - bool WeakPush(void* msg) noexcept; - - size_t RingSize; - TArrayPtr<void*> RingBuffer; - ui64 WritePawl = 0; - ui64 WriteFront = 0; - ui64 ReadPawl = 0; - ui64 ReadFront = 0; - }; -} +#pragma once + +/* + It's not a general purpose queue. + No order guarantee, but it mostly ordered. + Items may stuck in almost empty queue. + Use UnsafeScanningPop to pop all stuck items. + Almost wait-free for producers and consumers. + */ + +#include <util/system/atomic.h> +#include <util/generic/ptr.h> + +namespace NThreading { + struct TMPMCUnorderedRing { + public: + static constexpr ui16 MAX_PUSH_TRIES = 4; + static constexpr ui16 MAX_POP_TRIES = 4; + + TMPMCUnorderedRing(size_t size); + + bool Push(void* msg, ui16 retryCount = MAX_PUSH_TRIES) noexcept; + void StubbornPush(void* msg) { + while (!WeakPush(msg)) { + } + } + + void* Pop() noexcept; + + void* UnsafeScanningPop(ui64* last) noexcept; + + private: + bool WeakPush(void* msg) noexcept; + + size_t RingSize; + TArrayPtr<void*> RingBuffer; + ui64 WritePawl = 0; + ui64 WriteFront = 0; + ui64 ReadPawl = 0; + ui64 ReadFront = 0; + }; +} diff --git a/library/cpp/threading/queue/mpsc_htswap.cpp b/library/cpp/threading/queue/mpsc_htswap.cpp index 610c8f67f1..d8ab0d4f48 100644 --- a/library/cpp/threading/queue/mpsc_htswap.cpp +++ b/library/cpp/threading/queue/mpsc_htswap.cpp @@ -1 +1 @@ -#include "mpsc_htswap.h" +#include "mpsc_htswap.h" diff --git a/library/cpp/threading/queue/mpsc_htswap.h b/library/cpp/threading/queue/mpsc_htswap.h index c42caa7ac0..2d0bfd1146 100644 --- a/library/cpp/threading/queue/mpsc_htswap.h +++ b/library/cpp/threading/queue/mpsc_htswap.h @@ -1,132 +1,132 @@ -#pragma once - -/* - http://www.1024cores.net/home/lock-free-algorithms/queues/non-intrusive-mpsc-node-based-queue - - Simple semi-wait-free queue. Many producers - one consumer. - Tracking of allocated memory is not required. - No CAS. Only atomic swap (exchange) operations. - - WARNING: a sleeping producer can stop progress for consumer. - - WARNING: there is no wait¬ify mechanic for consumer, - consumer receives nullptr if queue was empty. - - WARNING: the algorithm itself is lock-free - but producers and consumer could be blocked by memory allocator - - Reference design: rtmapreduce/libs/threading/lfqueue.h - */ - -#include <util/generic/noncopyable.h> -#include <util/system/types.h> -#include <util/system/atomic.h> - -#include "tune.h" - -namespace NThreading { - namespace NHTSwapPrivate { - template <typename T, typename TTuneup> - struct TNode +#pragma once + +/* + http://www.1024cores.net/home/lock-free-algorithms/queues/non-intrusive-mpsc-node-based-queue + + Simple semi-wait-free queue. Many producers - one consumer. + Tracking of allocated memory is not required. + No CAS. Only atomic swap (exchange) operations. + + WARNING: a sleeping producer can stop progress for consumer. + + WARNING: there is no wait¬ify mechanic for consumer, + consumer receives nullptr if queue was empty. + + WARNING: the algorithm itself is lock-free + but producers and consumer could be blocked by memory allocator + + Reference design: rtmapreduce/libs/threading/lfqueue.h + */ + +#include <util/generic/noncopyable.h> +#include <util/system/types.h> +#include <util/system/atomic.h> + +#include "tune.h" + +namespace NThreading { + namespace NHTSwapPrivate { + template <typename T, typename TTuneup> + struct TNode : public TTuneup::TNodeBase, public TTuneup::template TNodeLayout<TNode<T, TTuneup>, T> { - TNode(const T& item) { - this->Next = nullptr; - this->Item = item; - } - - TNode(T&& item) { - this->Next = nullptr; - this->Item = std::move(item); - } - }; - - struct TDefaultTuneup { - struct TNodeBase: private TNonCopyable { - }; - - template <typename TNode, typename T> - struct TNodeLayout { - TNode* Next; - T Item; - }; - - template <typename TNode> - struct TQueueLayout { - TNode* Head; - TNode* Tail; - }; - }; - - template <typename T, typename TTuneup> - class THTSwapQueueImpl + TNode(const T& item) { + this->Next = nullptr; + this->Item = item; + } + + TNode(T&& item) { + this->Next = nullptr; + this->Item = std::move(item); + } + }; + + struct TDefaultTuneup { + struct TNodeBase: private TNonCopyable { + }; + + template <typename TNode, typename T> + struct TNodeLayout { + TNode* Next; + T Item; + }; + + template <typename TNode> + struct TQueueLayout { + TNode* Head; + TNode* Tail; + }; + }; + + template <typename T, typename TTuneup> + class THTSwapQueueImpl : protected TTuneup::template TQueueLayout<TNode<T, TTuneup>> { - protected: - using TTunedNode = TNode<T, TTuneup>; - - public: - using TItem = T; - - THTSwapQueueImpl() { - this->Head = new TTunedNode(T()); - this->Tail = this->Head; - } - - ~THTSwapQueueImpl() { - TTunedNode* node = this->Head; - while (node != nullptr) { - TTunedNode* next = node->Next; - delete node; - node = next; - } - } - - template <typename TT> - void Push(TT&& item) { - Enqueue(new TTunedNode(std::forward<TT>(item))); - } - - T Peek() { - TTunedNode* next = AtomicGet(this->Head->Next); - if (next == nullptr) { - return T(); - } - return next->Item; - } - - void Enqueue(TTunedNode* node) { - // our goal is to avoid expensive CAS here, - // but now consumer will be blocked until new tail linked. - // fortunately 'window of inconsistency' is extremely small. - TTunedNode* prev = AtomicSwap(&this->Tail, node); - AtomicSet(prev->Next, node); - } - - T Pop() { - TTunedNode* next = AtomicGet(this->Head->Next); - if (next == nullptr) { - return nullptr; - } - auto item = std::move(next->Item); - std::swap(this->Head, next); // no need atomic here - delete next; - return item; - } - - bool IsEmpty() const { - TTunedNode* next = AtomicGet(this->Head->Next); - return (next == nullptr); - } - }; - } - - DeclareTuneTypeParam(THTSwapNodeBase, TNodeBase); - DeclareTuneTypeParam(THTSwapNodeLayout, TNodeLayout); - DeclareTuneTypeParam(THTSwapQueueLayout, TQueueLayout); - + protected: + using TTunedNode = TNode<T, TTuneup>; + + public: + using TItem = T; + + THTSwapQueueImpl() { + this->Head = new TTunedNode(T()); + this->Tail = this->Head; + } + + ~THTSwapQueueImpl() { + TTunedNode* node = this->Head; + while (node != nullptr) { + TTunedNode* next = node->Next; + delete node; + node = next; + } + } + + template <typename TT> + void Push(TT&& item) { + Enqueue(new TTunedNode(std::forward<TT>(item))); + } + + T Peek() { + TTunedNode* next = AtomicGet(this->Head->Next); + if (next == nullptr) { + return T(); + } + return next->Item; + } + + void Enqueue(TTunedNode* node) { + // our goal is to avoid expensive CAS here, + // but now consumer will be blocked until new tail linked. + // fortunately 'window of inconsistency' is extremely small. + TTunedNode* prev = AtomicSwap(&this->Tail, node); + AtomicSet(prev->Next, node); + } + + T Pop() { + TTunedNode* next = AtomicGet(this->Head->Next); + if (next == nullptr) { + return nullptr; + } + auto item = std::move(next->Item); + std::swap(this->Head, next); // no need atomic here + delete next; + return item; + } + + bool IsEmpty() const { + TTunedNode* next = AtomicGet(this->Head->Next); + return (next == nullptr); + } + }; + } + + DeclareTuneTypeParam(THTSwapNodeBase, TNodeBase); + DeclareTuneTypeParam(THTSwapNodeLayout, TNodeLayout); + DeclareTuneTypeParam(THTSwapQueueLayout, TQueueLayout); + template <typename T = void*, typename... TParams> - class THTSwapQueue + class THTSwapQueue : public NHTSwapPrivate::THTSwapQueueImpl<T, TTune<NHTSwapPrivate::TDefaultTuneup, TParams...>> { - }; -} + }; +} diff --git a/library/cpp/threading/queue/mpsc_intrusive_unordered.cpp b/library/cpp/threading/queue/mpsc_intrusive_unordered.cpp index 3bb1a04f7e..a6a2fcef39 100644 --- a/library/cpp/threading/queue/mpsc_intrusive_unordered.cpp +++ b/library/cpp/threading/queue/mpsc_intrusive_unordered.cpp @@ -1,79 +1,79 @@ -#include "mpsc_intrusive_unordered.h" -#include <util/system/atomic.h> - -namespace NThreading { - void TMPSCIntrusiveUnordered::Push(TIntrusiveNode* node) noexcept { - auto head = AtomicGet(HeadForCaS); - for (ui32 i = NUMBER_OF_TRIES_FOR_CAS; i-- > 0;) { - // no ABA here, because Next is exactly head - // it does not matter how many travels head was made/ - node->Next = head; - auto prev = AtomicGetAndCas(&HeadForCaS, node, head); - if (head == prev) { - return; - } - head = prev; - } - // boring of trying to do cas, let's just swap - - // no need for atomic here, because the next is atomic swap - node->Next = 0; - - head = AtomicSwap(&HeadForSwap, node); - if (head != nullptr) { - AtomicSet(node->Next, head); - } else { - // consumer must know if no other thread may access the memory, - // setting Next to node is a way to notify consumer - AtomicSet(node->Next, node); - } - } - - TIntrusiveNode* TMPSCIntrusiveUnordered::PopMany() noexcept { - if (NotReadyChain == nullptr) { - auto head = AtomicSwap(&HeadForSwap, nullptr); - NotReadyChain = head; - } - - if (NotReadyChain != nullptr) { - auto next = AtomicGet(NotReadyChain->Next); - if (next != nullptr) { - auto ready = NotReadyChain; - TIntrusiveNode* cut; - do { - cut = NotReadyChain; - NotReadyChain = next; - next = AtomicGet(NotReadyChain->Next); - if (next == NotReadyChain) { - cut = NotReadyChain; - NotReadyChain = nullptr; - break; - } - } while (next != nullptr); - cut->Next = nullptr; - return ready; - } - } - - if (AtomicGet(HeadForCaS) != nullptr) { - return AtomicSwap(&HeadForCaS, nullptr); - } - return nullptr; - } - - TIntrusiveNode* TMPSCIntrusiveUnordered::Pop() noexcept { - if (PopOneQueue != nullptr) { - auto head = PopOneQueue; - PopOneQueue = PopOneQueue->Next; - return head; - } - - PopOneQueue = PopMany(); - if (PopOneQueue != nullptr) { - auto head = PopOneQueue; - PopOneQueue = PopOneQueue->Next; - return head; - } - return nullptr; - } -} +#include "mpsc_intrusive_unordered.h" +#include <util/system/atomic.h> + +namespace NThreading { + void TMPSCIntrusiveUnordered::Push(TIntrusiveNode* node) noexcept { + auto head = AtomicGet(HeadForCaS); + for (ui32 i = NUMBER_OF_TRIES_FOR_CAS; i-- > 0;) { + // no ABA here, because Next is exactly head + // it does not matter how many travels head was made/ + node->Next = head; + auto prev = AtomicGetAndCas(&HeadForCaS, node, head); + if (head == prev) { + return; + } + head = prev; + } + // boring of trying to do cas, let's just swap + + // no need for atomic here, because the next is atomic swap + node->Next = 0; + + head = AtomicSwap(&HeadForSwap, node); + if (head != nullptr) { + AtomicSet(node->Next, head); + } else { + // consumer must know if no other thread may access the memory, + // setting Next to node is a way to notify consumer + AtomicSet(node->Next, node); + } + } + + TIntrusiveNode* TMPSCIntrusiveUnordered::PopMany() noexcept { + if (NotReadyChain == nullptr) { + auto head = AtomicSwap(&HeadForSwap, nullptr); + NotReadyChain = head; + } + + if (NotReadyChain != nullptr) { + auto next = AtomicGet(NotReadyChain->Next); + if (next != nullptr) { + auto ready = NotReadyChain; + TIntrusiveNode* cut; + do { + cut = NotReadyChain; + NotReadyChain = next; + next = AtomicGet(NotReadyChain->Next); + if (next == NotReadyChain) { + cut = NotReadyChain; + NotReadyChain = nullptr; + break; + } + } while (next != nullptr); + cut->Next = nullptr; + return ready; + } + } + + if (AtomicGet(HeadForCaS) != nullptr) { + return AtomicSwap(&HeadForCaS, nullptr); + } + return nullptr; + } + + TIntrusiveNode* TMPSCIntrusiveUnordered::Pop() noexcept { + if (PopOneQueue != nullptr) { + auto head = PopOneQueue; + PopOneQueue = PopOneQueue->Next; + return head; + } + + PopOneQueue = PopMany(); + if (PopOneQueue != nullptr) { + auto head = PopOneQueue; + PopOneQueue = PopOneQueue->Next; + return head; + } + return nullptr; + } +} diff --git a/library/cpp/threading/queue/mpsc_intrusive_unordered.h b/library/cpp/threading/queue/mpsc_intrusive_unordered.h index 6ac7537ae9..c07cf761f6 100644 --- a/library/cpp/threading/queue/mpsc_intrusive_unordered.h +++ b/library/cpp/threading/queue/mpsc_intrusive_unordered.h @@ -1,35 +1,35 @@ -#pragma once - -/* - Simple almost-wait-free unordered queue for low contention operations. - - It's wait-free for producers. - Hanging producer can hide some items from consumer. - */ - -#include <util/system/types.h> - -namespace NThreading { - struct TIntrusiveNode { - TIntrusiveNode* Next; - }; - - class TMPSCIntrusiveUnordered { - public: - static constexpr ui32 NUMBER_OF_TRIES_FOR_CAS = 3; - - void Push(TIntrusiveNode* node) noexcept; - TIntrusiveNode* PopMany() noexcept; - TIntrusiveNode* Pop() noexcept; - - void Push(void* node) noexcept { - Push(reinterpret_cast<TIntrusiveNode*>(node)); - } - - private: - TIntrusiveNode* HeadForCaS = nullptr; - TIntrusiveNode* HeadForSwap = nullptr; - TIntrusiveNode* NotReadyChain = nullptr; - TIntrusiveNode* PopOneQueue = nullptr; - }; -} +#pragma once + +/* + Simple almost-wait-free unordered queue for low contention operations. + + It's wait-free for producers. + Hanging producer can hide some items from consumer. + */ + +#include <util/system/types.h> + +namespace NThreading { + struct TIntrusiveNode { + TIntrusiveNode* Next; + }; + + class TMPSCIntrusiveUnordered { + public: + static constexpr ui32 NUMBER_OF_TRIES_FOR_CAS = 3; + + void Push(TIntrusiveNode* node) noexcept; + TIntrusiveNode* PopMany() noexcept; + TIntrusiveNode* Pop() noexcept; + + void Push(void* node) noexcept { + Push(reinterpret_cast<TIntrusiveNode*>(node)); + } + + private: + TIntrusiveNode* HeadForCaS = nullptr; + TIntrusiveNode* HeadForSwap = nullptr; + TIntrusiveNode* NotReadyChain = nullptr; + TIntrusiveNode* PopOneQueue = nullptr; + }; +} diff --git a/library/cpp/threading/queue/mpsc_read_as_filled.cpp b/library/cpp/threading/queue/mpsc_read_as_filled.cpp index 8b4664a6f3..3b89fb1df6 100644 --- a/library/cpp/threading/queue/mpsc_read_as_filled.cpp +++ b/library/cpp/threading/queue/mpsc_read_as_filled.cpp @@ -1 +1 @@ -#include "mpsc_read_as_filled.h" +#include "mpsc_read_as_filled.h" diff --git a/library/cpp/threading/queue/mpsc_read_as_filled.h b/library/cpp/threading/queue/mpsc_read_as_filled.h index be33ba5a58..4dfdb1fbbf 100644 --- a/library/cpp/threading/queue/mpsc_read_as_filled.h +++ b/library/cpp/threading/queue/mpsc_read_as_filled.h @@ -1,611 +1,611 @@ -#pragma once - -/* - Completely wait-free queue, multiple producers - one consumer. Strict order. - The queue algorithm is using concept of virtual infinite array. - +#pragma once + +/* + Completely wait-free queue, multiple producers - one consumer. Strict order. + The queue algorithm is using concept of virtual infinite array. + A producer takes a number from a counter and atomically increments the counter. - The number taken is a number of a slot for the producer to put a new message - into infinite array. - - Then producer constructs a virtual infinite array by bidirectional linked list - of blocks. Each block contains several slots. - + The number taken is a number of a slot for the producer to put a new message + into infinite array. + + Then producer constructs a virtual infinite array by bidirectional linked list + of blocks. Each block contains several slots. + There is a hint pointer which optimistically points to the last block - of the list and never goes backward. - - Consumer exploits the property of the hint pointer always going forward - to free old blocks eventually. Consumer periodically read the hint pointer - and the counter and thus deduce producers which potentially holds the pointer - to a block. Consumer can free the block if all that producers filled their - slots and left the queue. - - No producer can stop the progress for other producers. - - Consumer can't stop the progress for producers. - Consumer can skip not-yet-filled slots and read them later. - Thus no producer can stop the progress for consumer. + of the list and never goes backward. + + Consumer exploits the property of the hint pointer always going forward + to free old blocks eventually. Consumer periodically read the hint pointer + and the counter and thus deduce producers which potentially holds the pointer + to a block. Consumer can free the block if all that producers filled their + slots and left the queue. + + No producer can stop the progress for other producers. + + Consumer can't stop the progress for producers. + Consumer can skip not-yet-filled slots and read them later. + Thus no producer can stop the progress for consumer. The algorithm is virtually strictly ordered because it skips slots only - if it is really does not matter in which order the slots were produced and - consumed. - - WARNING: there is no wait¬ify mechanic for consumer, - consumer receives nullptr if queue was empty. - - WARNING: though the algorithm itself is completely wait-free - but producers and consumer could be blocked by memory allocator - + if it is really does not matter in which order the slots were produced and + consumed. + + WARNING: there is no wait¬ify mechanic for consumer, + consumer receives nullptr if queue was empty. + + WARNING: though the algorithm itself is completely wait-free + but producers and consumer could be blocked by memory allocator + WARNING: copy constructors of the queue are not thread-safe - */ - -#include <util/generic/deque.h> -#include <util/generic/ptr.h> -#include <util/system/atomic.h> -#include <util/system/spinlock.h> - -#include "tune.h" - -namespace NThreading { - namespace NReadAsFilledPrivate { - typedef void* TMsgLink; - - static constexpr ui32 DEFAULT_BUNCH_SIZE = 251; - - struct TEmpty { - }; - - struct TEmptyAux { - TEmptyAux Retrieve() const { - return TEmptyAux(); - } - - void Store(TEmptyAux&) { - } - - static constexpr TEmptyAux Zero() { - return TEmptyAux(); - } - }; - - template <typename TAux> - struct TSlot { - TMsgLink volatile Msg; - TAux AuxiliaryData; - - inline void Store(TAux& aux) { - AuxiliaryData.Store(aux); - } - - inline TAux Retrieve() const { - return AuxiliaryData.Retrieve(); - } - - static TSlot<TAux> NullElem() { - return {nullptr, TAux::Zero()}; - } - - static TSlot<TAux> Pair(TMsgLink msg, TAux aux) { - return {msg, std::move(aux)}; - } - }; - - template <> - struct TSlot<TEmptyAux> { - TMsgLink volatile Msg; - - inline void Store(TEmptyAux&) { - } - - inline TEmptyAux Retrieve() const { - return TEmptyAux(); - } - - static TSlot<TEmptyAux> NullElem() { - return {nullptr}; - } - - static TSlot<TEmptyAux> Pair(TMsgLink msg, TEmptyAux) { - return {msg}; - } - }; - - enum TPushResult { - PUSH_RESULT_OK, - PUSH_RESULT_BACKWARD, - PUSH_RESULT_FORWARD, - }; - - template <ui32 BUNCH_SIZE = DEFAULT_BUNCH_SIZE, - typename TBase = TEmpty, - typename TAux = TEmptyAux> - struct TMsgBunch: public TBase { - static constexpr size_t RELEASE_SIZE = BUNCH_SIZE * 2; - - ui64 FirstSlot; - - TSlot<TAux> LinkArray[BUNCH_SIZE]; - - TMsgBunch* volatile NextBunch; - TMsgBunch* volatile BackLink; - - ui64 volatile Token; - TMsgBunch* volatile NextToken; - - /* this push can return PUSH_RESULT_BLOCKED */ + */ + +#include <util/generic/deque.h> +#include <util/generic/ptr.h> +#include <util/system/atomic.h> +#include <util/system/spinlock.h> + +#include "tune.h" + +namespace NThreading { + namespace NReadAsFilledPrivate { + typedef void* TMsgLink; + + static constexpr ui32 DEFAULT_BUNCH_SIZE = 251; + + struct TEmpty { + }; + + struct TEmptyAux { + TEmptyAux Retrieve() const { + return TEmptyAux(); + } + + void Store(TEmptyAux&) { + } + + static constexpr TEmptyAux Zero() { + return TEmptyAux(); + } + }; + + template <typename TAux> + struct TSlot { + TMsgLink volatile Msg; + TAux AuxiliaryData; + + inline void Store(TAux& aux) { + AuxiliaryData.Store(aux); + } + + inline TAux Retrieve() const { + return AuxiliaryData.Retrieve(); + } + + static TSlot<TAux> NullElem() { + return {nullptr, TAux::Zero()}; + } + + static TSlot<TAux> Pair(TMsgLink msg, TAux aux) { + return {msg, std::move(aux)}; + } + }; + + template <> + struct TSlot<TEmptyAux> { + TMsgLink volatile Msg; + + inline void Store(TEmptyAux&) { + } + + inline TEmptyAux Retrieve() const { + return TEmptyAux(); + } + + static TSlot<TEmptyAux> NullElem() { + return {nullptr}; + } + + static TSlot<TEmptyAux> Pair(TMsgLink msg, TEmptyAux) { + return {msg}; + } + }; + + enum TPushResult { + PUSH_RESULT_OK, + PUSH_RESULT_BACKWARD, + PUSH_RESULT_FORWARD, + }; + + template <ui32 BUNCH_SIZE = DEFAULT_BUNCH_SIZE, + typename TBase = TEmpty, + typename TAux = TEmptyAux> + struct TMsgBunch: public TBase { + static constexpr size_t RELEASE_SIZE = BUNCH_SIZE * 2; + + ui64 FirstSlot; + + TSlot<TAux> LinkArray[BUNCH_SIZE]; + + TMsgBunch* volatile NextBunch; + TMsgBunch* volatile BackLink; + + ui64 volatile Token; + TMsgBunch* volatile NextToken; + + /* this push can return PUSH_RESULT_BLOCKED */ inline TPushResult Push(TMsgLink msg, ui64 slot, TAux auxiliary) { - if (Y_UNLIKELY(slot < FirstSlot)) { - return PUSH_RESULT_BACKWARD; - } - - if (Y_UNLIKELY(slot >= FirstSlot + BUNCH_SIZE)) { - return PUSH_RESULT_FORWARD; - } - - LinkArray[slot - FirstSlot].Store(auxiliary); - - AtomicSet(LinkArray[slot - FirstSlot].Msg, msg); - return PUSH_RESULT_OK; - } - - inline bool IsSlotHere(ui64 slot) { - return slot < FirstSlot + BUNCH_SIZE; - } - - inline TMsgLink GetSlot(ui64 slot) const { - return AtomicGet(LinkArray[slot - FirstSlot].Msg); - } - - inline TSlot<TAux> GetSlotAux(ui64 slot) const { - auto msg = GetSlot(slot); - auto aux = LinkArray[slot - FirstSlot].Retrieve(); - return TSlot<TAux>::Pair(msg, aux); - } - - inline TMsgBunch* GetNextBunch() const { - return AtomicGet(NextBunch); - } - - inline bool SetNextBunch(TMsgBunch* ptr) { - return AtomicCas(&NextBunch, ptr, nullptr); - } - - inline TMsgBunch* GetBackLink() const { - return AtomicGet(BackLink); - } - - inline TMsgBunch* GetToken(ui64 slot) { - return reinterpret_cast<TMsgBunch*>( - LinkArray[slot - FirstSlot].Msg); - } - - inline void IncrementToken() { - AtomicIncrement(Token); - } - - // the object could be destroyed after this method - inline void DecrementToken() { - if (Y_UNLIKELY(AtomicDecrement(Token) == RELEASE_SIZE)) { - Release(this); - AtomicGet(NextToken)->DecrementToken(); - // this could be invalid here - } - } - - // the object could be destroyed after this method - inline void SetNextToken(TMsgBunch* next) { - AtomicSet(NextToken, next); + if (Y_UNLIKELY(slot < FirstSlot)) { + return PUSH_RESULT_BACKWARD; + } + + if (Y_UNLIKELY(slot >= FirstSlot + BUNCH_SIZE)) { + return PUSH_RESULT_FORWARD; + } + + LinkArray[slot - FirstSlot].Store(auxiliary); + + AtomicSet(LinkArray[slot - FirstSlot].Msg, msg); + return PUSH_RESULT_OK; + } + + inline bool IsSlotHere(ui64 slot) { + return slot < FirstSlot + BUNCH_SIZE; + } + + inline TMsgLink GetSlot(ui64 slot) const { + return AtomicGet(LinkArray[slot - FirstSlot].Msg); + } + + inline TSlot<TAux> GetSlotAux(ui64 slot) const { + auto msg = GetSlot(slot); + auto aux = LinkArray[slot - FirstSlot].Retrieve(); + return TSlot<TAux>::Pair(msg, aux); + } + + inline TMsgBunch* GetNextBunch() const { + return AtomicGet(NextBunch); + } + + inline bool SetNextBunch(TMsgBunch* ptr) { + return AtomicCas(&NextBunch, ptr, nullptr); + } + + inline TMsgBunch* GetBackLink() const { + return AtomicGet(BackLink); + } + + inline TMsgBunch* GetToken(ui64 slot) { + return reinterpret_cast<TMsgBunch*>( + LinkArray[slot - FirstSlot].Msg); + } + + inline void IncrementToken() { + AtomicIncrement(Token); + } + + // the object could be destroyed after this method + inline void DecrementToken() { + if (Y_UNLIKELY(AtomicDecrement(Token) == RELEASE_SIZE)) { + Release(this); + AtomicGet(NextToken)->DecrementToken(); + // this could be invalid here + } + } + + // the object could be destroyed after this method + inline void SetNextToken(TMsgBunch* next) { + AtomicSet(NextToken, next); if (Y_UNLIKELY(AtomicAdd(Token, RELEASE_SIZE) == RELEASE_SIZE)) { - Release(this); - next->DecrementToken(); - } - // this could be invalid here - } - - TMsgBunch(ui64 start, TMsgBunch* backLink) { - AtomicSet(FirstSlot, start); - memset(&LinkArray, 0, sizeof(LinkArray)); - AtomicSet(NextBunch, nullptr); - AtomicSet(BackLink, backLink); - - AtomicSet(Token, 1); - AtomicSet(NextToken, nullptr); - } - - static void Release(TMsgBunch* block) { - auto backLink = AtomicGet(block->BackLink); - if (backLink == nullptr) { - return; - } - AtomicSet(block->BackLink, nullptr); - - do { - auto bbackLink = backLink->BackLink; - delete backLink; - backLink = bbackLink; - } while (backLink != nullptr); - } - - void Destroy() { - for (auto tail = BackLink; tail != nullptr;) { - auto next = tail->BackLink; - delete tail; - tail = next; - } - - for (auto next = this; next != nullptr;) { - auto nnext = next->NextBunch; - delete next; - next = nnext; - } - } - }; - - template <ui32 BUNCH_SIZE = DEFAULT_BUNCH_SIZE, - typename TBunchBase = NReadAsFilledPrivate::TEmpty, - typename TAux = TEmptyAux> - class TWriteBucket { - public: - using TUsingAux = TAux; // for TReadBucket binding - using TBunch = TMsgBunch<BUNCH_SIZE, TBunchBase, TAux>; - - TWriteBucket(TBunch* bunch = new TBunch(0, nullptr)) { - AtomicSet(LastBunch, bunch); - AtomicSet(SlotCounter, 0); - } - - TWriteBucket(TWriteBucket&& move) - : LastBunch(move.LastBunch) - , SlotCounter(move.SlotCounter) - { - move.LastBunch = nullptr; - } - - ~TWriteBucket() { - if (LastBunch != nullptr) { - LastBunch->Destroy(); - } - } - - inline void Push(TMsgLink msg, TAux aux) { - ui64 pushSlot = AtomicGetAndIncrement(SlotCounter); - TBunch* hintBunch = GetLastBunch(); - - for (;;) { - auto hint = hintBunch->Push(msg, pushSlot, aux); - if (Y_LIKELY(hint == PUSH_RESULT_OK)) { - return; - } - HandleHint(hintBunch, hint); - } - } - - protected: - template <typename, template <typename, typename...> class> - friend class TReadBucket; - - TBunch* volatile LastBunch; // Hint - volatile ui64 SlotCounter; - - inline TBunch* GetLastBunch() const { - return AtomicGet(LastBunch); - } - - void HandleHint(TBunch*& hintBunch, TPushResult hint) { - if (Y_UNLIKELY(hint == PUSH_RESULT_BACKWARD)) { - hintBunch = hintBunch->GetBackLink(); - return; - } - - // PUSH_RESULT_FORWARD - auto nextBunch = hintBunch->GetNextBunch(); - - if (nextBunch == nullptr) { - auto first = hintBunch->FirstSlot + BUNCH_SIZE; - nextBunch = new TBunch(first, hintBunch); - if (Y_UNLIKELY(!hintBunch->SetNextBunch(nextBunch))) { - delete nextBunch; - nextBunch = hintBunch->GetNextBunch(); - } - } - - // hintBunch could not be freed here so it cannot be reused - // it's alright if this CAS was not succeeded, - // it means that other thread did that recently - AtomicCas(&LastBunch, nextBunch, hintBunch); - - hintBunch = nextBunch; - } - }; - + Release(this); + next->DecrementToken(); + } + // this could be invalid here + } + + TMsgBunch(ui64 start, TMsgBunch* backLink) { + AtomicSet(FirstSlot, start); + memset(&LinkArray, 0, sizeof(LinkArray)); + AtomicSet(NextBunch, nullptr); + AtomicSet(BackLink, backLink); + + AtomicSet(Token, 1); + AtomicSet(NextToken, nullptr); + } + + static void Release(TMsgBunch* block) { + auto backLink = AtomicGet(block->BackLink); + if (backLink == nullptr) { + return; + } + AtomicSet(block->BackLink, nullptr); + + do { + auto bbackLink = backLink->BackLink; + delete backLink; + backLink = bbackLink; + } while (backLink != nullptr); + } + + void Destroy() { + for (auto tail = BackLink; tail != nullptr;) { + auto next = tail->BackLink; + delete tail; + tail = next; + } + + for (auto next = this; next != nullptr;) { + auto nnext = next->NextBunch; + delete next; + next = nnext; + } + } + }; + + template <ui32 BUNCH_SIZE = DEFAULT_BUNCH_SIZE, + typename TBunchBase = NReadAsFilledPrivate::TEmpty, + typename TAux = TEmptyAux> + class TWriteBucket { + public: + using TUsingAux = TAux; // for TReadBucket binding + using TBunch = TMsgBunch<BUNCH_SIZE, TBunchBase, TAux>; + + TWriteBucket(TBunch* bunch = new TBunch(0, nullptr)) { + AtomicSet(LastBunch, bunch); + AtomicSet(SlotCounter, 0); + } + + TWriteBucket(TWriteBucket&& move) + : LastBunch(move.LastBunch) + , SlotCounter(move.SlotCounter) + { + move.LastBunch = nullptr; + } + + ~TWriteBucket() { + if (LastBunch != nullptr) { + LastBunch->Destroy(); + } + } + + inline void Push(TMsgLink msg, TAux aux) { + ui64 pushSlot = AtomicGetAndIncrement(SlotCounter); + TBunch* hintBunch = GetLastBunch(); + + for (;;) { + auto hint = hintBunch->Push(msg, pushSlot, aux); + if (Y_LIKELY(hint == PUSH_RESULT_OK)) { + return; + } + HandleHint(hintBunch, hint); + } + } + + protected: + template <typename, template <typename, typename...> class> + friend class TReadBucket; + + TBunch* volatile LastBunch; // Hint + volatile ui64 SlotCounter; + + inline TBunch* GetLastBunch() const { + return AtomicGet(LastBunch); + } + + void HandleHint(TBunch*& hintBunch, TPushResult hint) { + if (Y_UNLIKELY(hint == PUSH_RESULT_BACKWARD)) { + hintBunch = hintBunch->GetBackLink(); + return; + } + + // PUSH_RESULT_FORWARD + auto nextBunch = hintBunch->GetNextBunch(); + + if (nextBunch == nullptr) { + auto first = hintBunch->FirstSlot + BUNCH_SIZE; + nextBunch = new TBunch(first, hintBunch); + if (Y_UNLIKELY(!hintBunch->SetNextBunch(nextBunch))) { + delete nextBunch; + nextBunch = hintBunch->GetNextBunch(); + } + } + + // hintBunch could not be freed here so it cannot be reused + // it's alright if this CAS was not succeeded, + // it means that other thread did that recently + AtomicCas(&LastBunch, nextBunch, hintBunch); + + hintBunch = nextBunch; + } + }; + template <typename TWBucket = TWriteBucket<>, template <typename, typename...> class TContainer = TDeque> - class TReadBucket { - public: - using TAux = typename TWBucket::TUsingAux; - using TBunch = typename TWBucket::TBunch; - - static constexpr int MAX_NUMBER_OF_TRIES_TO_READ = 5; - - TReadBucket(TWBucket* writer) - : Writer(writer) - , ReadBunch(writer->GetLastBunch()) - , LastKnownPushBunch(writer->GetLastBunch()) - { - ReadBunch->DecrementToken(); // no previous token - } - - TReadBucket(TReadBucket toCopy, TWBucket* writer) - : TReadBucket(std::move(toCopy)) - { - Writer = writer; - } - - ui64 ReadyCount() const { - return AtomicGet(Writer->SlotCounter) - ReadSlot; - } - - TMsgLink Pop() { - return PopAux().Msg; - } - - TMsgLink Peek() { - return PeekAux().Msg; - } - - TSlot<TAux> PopAux() { - for (;;) { - if (Y_UNLIKELY(ReadNow.size() != 0)) { - auto result = PopSkipped(); - if (Y_LIKELY(result.Msg != nullptr)) { - return result; - } - } - - if (Y_UNLIKELY(ReadSlot == LastKnownPushSlot)) { - if (Y_LIKELY(!RereadPushSlot())) { - return TSlot<TAux>::NullElem(); - } - continue; - } - - if (Y_UNLIKELY(!ReadBunch->IsSlotHere(ReadSlot))) { - if (Y_UNLIKELY(!SwitchToNextBunch())) { - return TSlot<TAux>::NullElem(); - } - } - - auto result = ReadBunch->GetSlotAux(ReadSlot); - if (Y_LIKELY(result.Msg != nullptr)) { - ++ReadSlot; - return result; - } - - result = StubbornPop(); - if (Y_LIKELY(result.Msg != nullptr)) { - return result; - } - } - } - - TSlot<TAux> PeekAux() { - for (;;) { - if (Y_UNLIKELY(ReadNow.size() != 0)) { - auto result = PeekSkipped(); - if (Y_LIKELY(result.Msg != nullptr)) { - return result; - } - } - - if (Y_UNLIKELY(ReadSlot == LastKnownPushSlot)) { - if (Y_LIKELY(!RereadPushSlot())) { - return TSlot<TAux>::NullElem(); - } - continue; - } - - if (Y_UNLIKELY(!ReadBunch->IsSlotHere(ReadSlot))) { - if (Y_UNLIKELY(!SwitchToNextBunch())) { - return TSlot<TAux>::NullElem(); - } - } - - auto result = ReadBunch->GetSlotAux(ReadSlot); - if (Y_LIKELY(result.Msg != nullptr)) { - return result; - } - - result = StubbornPeek(); - if (Y_LIKELY(result.Msg != nullptr)) { - return result; - } - } - } - - private: - TWBucket* Writer; - TBunch* ReadBunch; - ui64 ReadSlot = 0; - TBunch* LastKnownPushBunch; - ui64 LastKnownPushSlot = 0; - - struct TSkipItem { - TBunch* Bunch; - ui64 Slot; - TBunch* Token; - }; - - TContainer<TSkipItem> ReadNow; - TContainer<TSkipItem> ReadLater; - - void AddToReadLater() { - ReadLater.push_back({ReadBunch, ReadSlot, LastKnownPushBunch}); - LastKnownPushBunch->IncrementToken(); - ++ReadSlot; - } - - // MUST BE: ReadSlot == LastKnownPushSlot - bool RereadPushSlot() { - ReadNow = std::move(ReadLater); - ReadLater.clear(); - - auto oldSlot = LastKnownPushSlot; - - auto currentPushBunch = Writer->GetLastBunch(); - auto currentPushSlot = AtomicGet(Writer->SlotCounter); - - if (currentPushBunch != LastKnownPushBunch) { - // LastKnownPushBunch could be invalid after this line - LastKnownPushBunch->SetNextToken(currentPushBunch); - } - - LastKnownPushBunch = currentPushBunch; - LastKnownPushSlot = currentPushSlot; - - return oldSlot != LastKnownPushSlot; - } - - bool SwitchToNextBunch() { - for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) { - auto next = ReadBunch->GetNextBunch(); - if (next != nullptr) { - ReadBunch = next; - return true; - } - SpinLockPause(); - } - return false; - } - - TSlot<TAux> StubbornPop() { - for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) { - auto result = ReadBunch->GetSlotAux(ReadSlot); - if (Y_LIKELY(result.Msg != nullptr)) { - ++ReadSlot; - return result; - } - SpinLockPause(); - } - - AddToReadLater(); - return TSlot<TAux>::NullElem(); - } - - TSlot<TAux> StubbornPeek() { - for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) { - auto result = ReadBunch->GetSlotAux(ReadSlot); - if (Y_LIKELY(result.Msg != nullptr)) { - return result; - } - SpinLockPause(); - } - - AddToReadLater(); - return TSlot<TAux>::NullElem(); - } - - TSlot<TAux> PopSkipped() { - do { - auto elem = ReadNow.front(); - ReadNow.pop_front(); - - auto result = elem.Bunch->GetSlotAux(elem.Slot); - if (Y_LIKELY(result.Msg != nullptr)) { - elem.Token->DecrementToken(); - return result; - } - - ReadLater.emplace_back(elem); - - } while (ReadNow.size() > 0); - - return TSlot<TAux>::NullElem(); - } - - TSlot<TAux> PeekSkipped() { - do { - auto elem = ReadNow.front(); - - auto result = elem.Bunch->GetSlotAux(elem.Slot); - if (Y_LIKELY(result.Msg != nullptr)) { - return result; - } - - ReadNow.pop_front(); - ReadLater.emplace_back(elem); - - } while (ReadNow.size() > 0); - - return TSlot<TAux>::NullElem(); - } - }; - - struct TDefaultParams { - static constexpr ui32 BUNCH_SIZE = DEFAULT_BUNCH_SIZE; - using TBunchBase = TEmpty; - + class TReadBucket { + public: + using TAux = typename TWBucket::TUsingAux; + using TBunch = typename TWBucket::TBunch; + + static constexpr int MAX_NUMBER_OF_TRIES_TO_READ = 5; + + TReadBucket(TWBucket* writer) + : Writer(writer) + , ReadBunch(writer->GetLastBunch()) + , LastKnownPushBunch(writer->GetLastBunch()) + { + ReadBunch->DecrementToken(); // no previous token + } + + TReadBucket(TReadBucket toCopy, TWBucket* writer) + : TReadBucket(std::move(toCopy)) + { + Writer = writer; + } + + ui64 ReadyCount() const { + return AtomicGet(Writer->SlotCounter) - ReadSlot; + } + + TMsgLink Pop() { + return PopAux().Msg; + } + + TMsgLink Peek() { + return PeekAux().Msg; + } + + TSlot<TAux> PopAux() { + for (;;) { + if (Y_UNLIKELY(ReadNow.size() != 0)) { + auto result = PopSkipped(); + if (Y_LIKELY(result.Msg != nullptr)) { + return result; + } + } + + if (Y_UNLIKELY(ReadSlot == LastKnownPushSlot)) { + if (Y_LIKELY(!RereadPushSlot())) { + return TSlot<TAux>::NullElem(); + } + continue; + } + + if (Y_UNLIKELY(!ReadBunch->IsSlotHere(ReadSlot))) { + if (Y_UNLIKELY(!SwitchToNextBunch())) { + return TSlot<TAux>::NullElem(); + } + } + + auto result = ReadBunch->GetSlotAux(ReadSlot); + if (Y_LIKELY(result.Msg != nullptr)) { + ++ReadSlot; + return result; + } + + result = StubbornPop(); + if (Y_LIKELY(result.Msg != nullptr)) { + return result; + } + } + } + + TSlot<TAux> PeekAux() { + for (;;) { + if (Y_UNLIKELY(ReadNow.size() != 0)) { + auto result = PeekSkipped(); + if (Y_LIKELY(result.Msg != nullptr)) { + return result; + } + } + + if (Y_UNLIKELY(ReadSlot == LastKnownPushSlot)) { + if (Y_LIKELY(!RereadPushSlot())) { + return TSlot<TAux>::NullElem(); + } + continue; + } + + if (Y_UNLIKELY(!ReadBunch->IsSlotHere(ReadSlot))) { + if (Y_UNLIKELY(!SwitchToNextBunch())) { + return TSlot<TAux>::NullElem(); + } + } + + auto result = ReadBunch->GetSlotAux(ReadSlot); + if (Y_LIKELY(result.Msg != nullptr)) { + return result; + } + + result = StubbornPeek(); + if (Y_LIKELY(result.Msg != nullptr)) { + return result; + } + } + } + + private: + TWBucket* Writer; + TBunch* ReadBunch; + ui64 ReadSlot = 0; + TBunch* LastKnownPushBunch; + ui64 LastKnownPushSlot = 0; + + struct TSkipItem { + TBunch* Bunch; + ui64 Slot; + TBunch* Token; + }; + + TContainer<TSkipItem> ReadNow; + TContainer<TSkipItem> ReadLater; + + void AddToReadLater() { + ReadLater.push_back({ReadBunch, ReadSlot, LastKnownPushBunch}); + LastKnownPushBunch->IncrementToken(); + ++ReadSlot; + } + + // MUST BE: ReadSlot == LastKnownPushSlot + bool RereadPushSlot() { + ReadNow = std::move(ReadLater); + ReadLater.clear(); + + auto oldSlot = LastKnownPushSlot; + + auto currentPushBunch = Writer->GetLastBunch(); + auto currentPushSlot = AtomicGet(Writer->SlotCounter); + + if (currentPushBunch != LastKnownPushBunch) { + // LastKnownPushBunch could be invalid after this line + LastKnownPushBunch->SetNextToken(currentPushBunch); + } + + LastKnownPushBunch = currentPushBunch; + LastKnownPushSlot = currentPushSlot; + + return oldSlot != LastKnownPushSlot; + } + + bool SwitchToNextBunch() { + for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) { + auto next = ReadBunch->GetNextBunch(); + if (next != nullptr) { + ReadBunch = next; + return true; + } + SpinLockPause(); + } + return false; + } + + TSlot<TAux> StubbornPop() { + for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) { + auto result = ReadBunch->GetSlotAux(ReadSlot); + if (Y_LIKELY(result.Msg != nullptr)) { + ++ReadSlot; + return result; + } + SpinLockPause(); + } + + AddToReadLater(); + return TSlot<TAux>::NullElem(); + } + + TSlot<TAux> StubbornPeek() { + for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) { + auto result = ReadBunch->GetSlotAux(ReadSlot); + if (Y_LIKELY(result.Msg != nullptr)) { + return result; + } + SpinLockPause(); + } + + AddToReadLater(); + return TSlot<TAux>::NullElem(); + } + + TSlot<TAux> PopSkipped() { + do { + auto elem = ReadNow.front(); + ReadNow.pop_front(); + + auto result = elem.Bunch->GetSlotAux(elem.Slot); + if (Y_LIKELY(result.Msg != nullptr)) { + elem.Token->DecrementToken(); + return result; + } + + ReadLater.emplace_back(elem); + + } while (ReadNow.size() > 0); + + return TSlot<TAux>::NullElem(); + } + + TSlot<TAux> PeekSkipped() { + do { + auto elem = ReadNow.front(); + + auto result = elem.Bunch->GetSlotAux(elem.Slot); + if (Y_LIKELY(result.Msg != nullptr)) { + return result; + } + + ReadNow.pop_front(); + ReadLater.emplace_back(elem); + + } while (ReadNow.size() > 0); + + return TSlot<TAux>::NullElem(); + } + }; + + struct TDefaultParams { + static constexpr ui32 BUNCH_SIZE = DEFAULT_BUNCH_SIZE; + using TBunchBase = TEmpty; + template <typename TElem, typename... TRest> using TContainer = TDeque<TElem, TRest...>; - - static constexpr bool DeleteItems = true; - }; - - } //namespace NReadAsFilledPrivate - - DeclareTuneValueParam(TRaFQueueBunchSize, ui32, BUNCH_SIZE); - DeclareTuneTypeParam(TRaFQueueBunchBase, TBunchBase); - DeclareTuneContainer(TRaFQueueSkipContainer, TContainer); - DeclareTuneValueParam(TRaFQueueDeleteItems, bool, DeleteItems); - + + static constexpr bool DeleteItems = true; + }; + + } //namespace NReadAsFilledPrivate + + DeclareTuneValueParam(TRaFQueueBunchSize, ui32, BUNCH_SIZE); + DeclareTuneTypeParam(TRaFQueueBunchBase, TBunchBase); + DeclareTuneContainer(TRaFQueueSkipContainer, TContainer); + DeclareTuneValueParam(TRaFQueueDeleteItems, bool, DeleteItems); + template <typename TItem = void, typename... TParams> - class TReadAsFilledQueue { - private: - using TTuned = TTune<NReadAsFilledPrivate::TDefaultParams, TParams...>; - - static constexpr ui32 BUNCH_SIZE = TTuned::BUNCH_SIZE; - - using TBunchBase = typename TTuned::TBunchBase; - + class TReadAsFilledQueue { + private: + using TTuned = TTune<NReadAsFilledPrivate::TDefaultParams, TParams...>; + + static constexpr ui32 BUNCH_SIZE = TTuned::BUNCH_SIZE; + + using TBunchBase = typename TTuned::TBunchBase; + template <typename TElem, typename... TRest> - using TContainer = - typename TTuned::template TContainer<TElem, TRest...>; - - using TWriteBucket = - NReadAsFilledPrivate::TWriteBucket<BUNCH_SIZE, TBunchBase>; - using TReadBucket = - NReadAsFilledPrivate::TReadBucket<TWriteBucket, TContainer>; - - public: - TReadAsFilledQueue() - : RBucket(&WBucket) - { - } - - ~TReadAsFilledQueue() { - if (TTuned::DeleteItems) { - for (;;) { - auto msg = Pop(); - if (msg == nullptr) { - break; - } - TDelete::Destroy(msg); - } - } - } - - void Push(TItem* msg) { - WBucket.Push((void*)msg, NReadAsFilledPrivate::TEmptyAux()); - } - - TItem* Pop() { - return (TItem*)RBucket.Pop(); - } - - TItem* Peek() { - return (TItem*)RBucket.Peek(); - } - - protected: - TWriteBucket WBucket; - TReadBucket RBucket; - }; -} + using TContainer = + typename TTuned::template TContainer<TElem, TRest...>; + + using TWriteBucket = + NReadAsFilledPrivate::TWriteBucket<BUNCH_SIZE, TBunchBase>; + using TReadBucket = + NReadAsFilledPrivate::TReadBucket<TWriteBucket, TContainer>; + + public: + TReadAsFilledQueue() + : RBucket(&WBucket) + { + } + + ~TReadAsFilledQueue() { + if (TTuned::DeleteItems) { + for (;;) { + auto msg = Pop(); + if (msg == nullptr) { + break; + } + TDelete::Destroy(msg); + } + } + } + + void Push(TItem* msg) { + WBucket.Push((void*)msg, NReadAsFilledPrivate::TEmptyAux()); + } + + TItem* Pop() { + return (TItem*)RBucket.Pop(); + } + + TItem* Peek() { + return (TItem*)RBucket.Peek(); + } + + protected: + TWriteBucket WBucket; + TReadBucket RBucket; + }; +} diff --git a/library/cpp/threading/queue/mpsc_vinfarr_obstructive.cpp b/library/cpp/threading/queue/mpsc_vinfarr_obstructive.cpp index 2bd0c29821..00dbfeaa64 100644 --- a/library/cpp/threading/queue/mpsc_vinfarr_obstructive.cpp +++ b/library/cpp/threading/queue/mpsc_vinfarr_obstructive.cpp @@ -1 +1 @@ -#include "mpsc_vinfarr_obstructive.h" +#include "mpsc_vinfarr_obstructive.h" diff --git a/library/cpp/threading/queue/mpsc_vinfarr_obstructive.h b/library/cpp/threading/queue/mpsc_vinfarr_obstructive.h index 5f91f1b5a8..3e1ae92342 100644 --- a/library/cpp/threading/queue/mpsc_vinfarr_obstructive.h +++ b/library/cpp/threading/queue/mpsc_vinfarr_obstructive.h @@ -1,528 +1,528 @@ -#pragma once - -/* - Semi-wait-free queue, multiple producers - one consumer. Strict order. - The queue algorithm is using concept of virtual infinite array. - - A producer takes a number from a counter and atomicaly increments the counter. - The number taken is a number of a slot for the producer to put a new message - into infinite array. - - Then producer constructs a virtual infinite array by bidirectional linked list - of blocks. Each block contains several slots. - - There is a hint pointer which optimisticly points to the last block - of the list and never goes backward. - - Consumer exploits the property of the hint pointer always going forward - to free old blocks eventually. Consumer periodically read the hint pointer - and the counter and thus deduce producers which potentially holds the pointer - to a block. Consumer can free the block if all that producers filled their - slots and left the queue. - - No producer can stop the progress for other producers. - - Consumer can obstruct a slot of a delayed producer by putting special mark. - Thus no producer can stop the progress for consumer. - But a slow producer may be forced to retry unlimited number of times. - Though it's very unlikely for a non-preempted producer to be obstructed. - That's why the algorithm is semi-wait-free. - - WARNING: there is no wait¬ify mechanic for consumer, - consumer receives nullptr if queue was empty. - - WARNING: though the algorithm itself is lock-free - but producers and consumer could be blocked by memory allocator - - WARNING: copy constructers of the queue are not thread-safe - */ - -#include <util/generic/noncopyable.h> -#include <util/generic/ptr.h> -#include <util/system/atomic.h> -#include <util/system/spinlock.h> - -#include "tune.h" - -namespace NThreading { - namespace NObstructiveQueuePrivate { - typedef void* TMsgLink; - - struct TEmpty { - }; - - struct TEmptyAux { - TEmptyAux Retrieve() const { - return TEmptyAux(); - } - void Store(TEmptyAux&) { - } - static constexpr TEmptyAux Zero() { - return TEmptyAux(); - } - }; - - template <typename TAux> - struct TSlot { - TMsgLink volatile Msg; - TAux AuxiliaryData; - - inline void Store(TAux& aux) { - AuxiliaryData.Store(aux); - } - - inline TAux Retrieve() const { - return AuxiliaryData.Retrieve(); - } - - static TSlot<TAux> NullElem() { - return {nullptr, TAux::Zero()}; - } - - static TSlot<TAux> Pair(TMsgLink msg, TAux aux) { - return {msg, std::move(aux)}; - } - }; - - template <> - struct TSlot<TEmptyAux> { - TMsgLink volatile Msg; - inline void Store(TEmptyAux&) { - } - inline TEmptyAux Retrieve() const { - return TEmptyAux(); - } - - static TSlot<TEmptyAux> NullElem() { - return {nullptr}; - } - - static TSlot<TEmptyAux> Pair(TMsgLink msg, TEmptyAux) { - return {msg}; - } - }; - - enum TPushResult { - PUSH_RESULT_OK, - PUSH_RESULT_BACKWARD, - PUSH_RESULT_FORWARD, - PUSH_RESULT_BLOCKED, - }; - - template <typename TAux, ui32 BUNCH_SIZE, typename TBase = TEmpty> - struct TMsgBunch: public TBase { - ui64 FirstSlot; - - TSlot<TAux> LinkArray[BUNCH_SIZE]; - - TMsgBunch* volatile NextBunch; - TMsgBunch* volatile BackLink; - - ui64 volatile Token; - TMsgBunch* volatile NextToken; - - /* this push can return PUSH_RESULT_BLOCKED */ - inline TPushResult Push(TMsgLink msg, ui64 slot, TAux auxiliary) { - if (Y_UNLIKELY(slot < FirstSlot)) { - return PUSH_RESULT_BACKWARD; - } - - if (Y_UNLIKELY(slot >= FirstSlot + BUNCH_SIZE)) { - return PUSH_RESULT_FORWARD; - } - - LinkArray[slot - FirstSlot].Store(auxiliary); - - auto oldValue = AtomicSwap(&LinkArray[slot - FirstSlot].Msg, msg); - - if (Y_LIKELY(oldValue == nullptr)) { - return PUSH_RESULT_OK; - } else { - LeaveBlocked(oldValue); - return PUSH_RESULT_BLOCKED; - } - } - - inline bool IsSlotHere(ui64 slot) { - return slot < FirstSlot + BUNCH_SIZE; - } - - inline TMsgLink GetSlot(ui64 slot) const { - return AtomicGet(LinkArray[slot - FirstSlot].Msg); - } - - inline TSlot<TAux> GetSlotAux(ui64 slot) const { - auto msg = GetSlot(slot); - auto aux = LinkArray[slot - FirstSlot].Retrieve(); - return TSlot<TAux>::Pair(msg, aux); - } - - void LeaveBlocked(ui64 slot) { - auto token = GetToken(slot); - token->DecrementToken(); - } - - void LeaveBlocked(TMsgLink msg) { - auto token = reinterpret_cast<TMsgBunch*>(msg); - token->DecrementToken(); - } - - TSlot<TAux> BlockSlotAux(ui64 slot, TMsgBunch* token) { - auto old = - AtomicSwap(&LinkArray[slot - FirstSlot].Msg, (TMsgLink)token); - if (old == nullptr) { - // It's valid to increment after AtomicCas - // because token will release data only after SetNextToken - token->IncrementToken(); - return TSlot<TAux>::NullElem(); - } - return TSlot<TAux>::Pair(old, LinkArray[slot - FirstSlot].Retrieve()); - } - - inline TMsgBunch* GetNextBunch() const { - return AtomicGet(NextBunch); - } - - inline bool SetNextBunch(TMsgBunch* ptr) { - return AtomicCas(&NextBunch, ptr, nullptr); - } - - inline TMsgBunch* GetBackLink() const { - return AtomicGet(BackLink); - } - - inline TMsgBunch* GetToken(ui64 slot) { - return reinterpret_cast<TMsgBunch*>(LinkArray[slot - FirstSlot].Msg); - } - - inline void IncrementToken() { - AtomicIncrement(Token); - } - - // the object could be destroyed after this method - inline void DecrementToken() { - if (Y_UNLIKELY(AtomicDecrement(Token) == BUNCH_SIZE)) { - Release(this); - AtomicGet(NextToken)->DecrementToken(); - // this could be invalid here - } - } - - // the object could be destroyed after this method - inline void SetNextToken(TMsgBunch* next) { - AtomicSet(NextToken, next); - if (Y_UNLIKELY(AtomicAdd(Token, BUNCH_SIZE) == BUNCH_SIZE)) { - Release(this); - next->DecrementToken(); - } - // this could be invalid here - } - - TMsgBunch(ui64 start, TMsgBunch* backLink) { - AtomicSet(FirstSlot, start); - memset(&LinkArray, 0, sizeof(LinkArray)); - AtomicSet(NextBunch, nullptr); - AtomicSet(BackLink, backLink); - - AtomicSet(Token, 1); - AtomicSet(NextToken, nullptr); - } - - static void Release(TMsgBunch* bunch) { - auto backLink = AtomicGet(bunch->BackLink); - if (backLink == nullptr) { - return; - } - AtomicSet(bunch->BackLink, nullptr); - - do { - auto bbackLink = backLink->BackLink; - delete backLink; - backLink = bbackLink; - } while (backLink != nullptr); - } - - void Destroy() { - for (auto tail = BackLink; tail != nullptr;) { - auto next = tail->BackLink; - delete tail; - tail = next; - } - - for (auto next = this; next != nullptr;) { - auto nnext = next->NextBunch; - delete next; - next = nnext; - } - } - }; - - template <typename TAux, ui32 BUNCH_SIZE, typename TBunchBase = TEmpty> - class TWriteBucket { - public: - static const ui64 GROSS_SIZE; - - using TBunch = TMsgBunch<TAux, BUNCH_SIZE, TBunchBase>; - - TWriteBucket(TBunch* bunch = new TBunch(0, nullptr)) - : LastBunch(bunch) - , SlotCounter(0) - { - } - - TWriteBucket(TWriteBucket&& move) - : LastBunch(move.LastBunch) - , SlotCounter(move.SlotCounter) - { - move.LastBunch = nullptr; - } - - ~TWriteBucket() { - if (LastBunch != nullptr) { - LastBunch->Destroy(); - } - } - - inline bool Push(TMsgLink msg, TAux aux) { - ui64 pushSlot = AtomicGetAndIncrement(SlotCounter); - TBunch* hintBunch = GetLastBunch(); - - for (;;) { - auto hint = hintBunch->Push(msg, pushSlot, aux); - if (Y_LIKELY(hint == PUSH_RESULT_OK)) { - return true; - } - bool hhResult = HandleHint(hintBunch, hint); - if (Y_UNLIKELY(!hhResult)) { - return false; - } - } - } - - protected: - template <typename, ui32, typename> - friend class TReadBucket; - - TBunch* volatile LastBunch; // Hint - volatile ui64 SlotCounter; - - inline TBunch* GetLastBunch() const { - return AtomicGet(LastBunch); - } - - bool HandleHint(TBunch*& hintBunch, TPushResult hint) { - if (Y_UNLIKELY(hint == PUSH_RESULT_BLOCKED)) { - return false; - } - - if (Y_UNLIKELY(hint == PUSH_RESULT_BACKWARD)) { - hintBunch = hintBunch->GetBackLink(); - return true; - } - - // PUSH_RESULT_FORWARD - auto nextBunch = hintBunch->GetNextBunch(); - - if (nextBunch == nullptr) { - auto first = hintBunch->FirstSlot + BUNCH_SIZE; - nextBunch = new TBunch(first, hintBunch); - if (Y_UNLIKELY(!hintBunch->SetNextBunch(nextBunch))) { - delete nextBunch; - nextBunch = hintBunch->GetNextBunch(); - } - } - - // hintBunch could not be freed here so it cannot be reused - // it's alright if this CAS was not succeeded, - // it means that other thread did that recently - AtomicCas(&LastBunch, nextBunch, hintBunch); - - hintBunch = nextBunch; - return true; - } - }; - - template <typename TAux, ui32 BUNCH_SIZE, typename TBunchBase> - class TReadBucket { - public: - static constexpr int MAX_NUMBER_OF_TRIES_TO_READ = 20; - - using TWBucket = TWriteBucket<TAux, BUNCH_SIZE, TBunchBase>; - using TBunch = TMsgBunch<TAux, BUNCH_SIZE, TBunchBase>; - - TReadBucket(TWBucket* writer) - : Writer(writer) - , ReadBunch(writer->GetLastBunch()) - , LastKnownPushBunch(writer->GetLastBunch()) - { - ReadBunch->DecrementToken(); // no previous token - } - - TReadBucket(TReadBucket toCopy, TWBucket* writer) - : TReadBucket(std::move(toCopy)) - { - Writer = writer; - } - - ui64 ReadyCount() const { - return AtomicGet(Writer->SlotCounter) - ReadSlot; - } - - inline TMsgLink Pop() { - return PopAux().Msg; - } - - inline TSlot<TAux> PopAux() { - for (;;) { - if (Y_UNLIKELY(ReadSlot == LastKnownPushSlot)) { - if (Y_LIKELY(!RereadPushSlot())) { - return TSlot<TAux>::NullElem(); - } - } - - if (Y_UNLIKELY(!ReadBunch->IsSlotHere(ReadSlot))) { - if (Y_UNLIKELY(!SwitchToNextBunch())) { - return TSlot<TAux>::NullElem(); - } - } - - auto result = ReadBunch->GetSlotAux(ReadSlot); - if (Y_LIKELY(result.Msg != nullptr)) { - ++ReadSlot; - return result; - } - - if (ReadSlot + 1 == AtomicGet(Writer->SlotCounter)) { - return TSlot<TAux>::NullElem(); - } - - result = StubbornPopAux(); - - if (result.Msg != nullptr) { - return result; - } - } - } - - private: - TWBucket* Writer; - TBunch* ReadBunch; - ui64 ReadSlot = 0; - TBunch* LastKnownPushBunch; - ui64 LastKnownPushSlot = 0; - - // MUST BE: ReadSlot == LastKnownPushSlot - bool RereadPushSlot() { - auto oldSlot = LastKnownPushSlot; - - auto currentPushBunch = Writer->GetLastBunch(); - auto currentPushSlot = AtomicGet(Writer->SlotCounter); - - if (currentPushBunch != LastKnownPushBunch) { - // LastKnownPushBunch could be invalid after this line - LastKnownPushBunch->SetNextToken(currentPushBunch); - } - - LastKnownPushBunch = currentPushBunch; - LastKnownPushSlot = currentPushSlot; - - return oldSlot != LastKnownPushSlot; - } - - bool SwitchToNextBunch() { - for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) { - auto next = ReadBunch->GetNextBunch(); - if (next != nullptr) { - ReadBunch = next; - return true; - } - SpinLockPause(); - } - return false; - } - - TSlot<TAux> StubbornPopAux() { - for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) { - auto result = ReadBunch->GetSlotAux(ReadSlot); - if (Y_LIKELY(result.Msg != nullptr)) { - ++ReadSlot; - return result; - } - SpinLockPause(); - } - - return ReadBunch->BlockSlotAux(ReadSlot++, LastKnownPushBunch); - } - }; - - struct TDefaultParams { - static constexpr bool DeleteItems = true; - using TAux = NObstructiveQueuePrivate::TEmptyAux; - using TBunchBase = NObstructiveQueuePrivate::TEmpty; - static constexpr ui32 BUNCH_SIZE = 251; - }; - - } //namespace NObstructiveQueuePrivate - - DeclareTuneValueParam(TObstructiveQueueBunchSize, ui32, BUNCH_SIZE); - DeclareTuneValueParam(TObstructiveQueueDeleteItems, bool, DeleteItems); - DeclareTuneTypeParam(TObstructiveQueueBunchBase, TBunchBase); - DeclareTuneTypeParam(TObstructiveQueueAux, TAux); - +#pragma once + +/* + Semi-wait-free queue, multiple producers - one consumer. Strict order. + The queue algorithm is using concept of virtual infinite array. + + A producer takes a number from a counter and atomicaly increments the counter. + The number taken is a number of a slot for the producer to put a new message + into infinite array. + + Then producer constructs a virtual infinite array by bidirectional linked list + of blocks. Each block contains several slots. + + There is a hint pointer which optimisticly points to the last block + of the list and never goes backward. + + Consumer exploits the property of the hint pointer always going forward + to free old blocks eventually. Consumer periodically read the hint pointer + and the counter and thus deduce producers which potentially holds the pointer + to a block. Consumer can free the block if all that producers filled their + slots and left the queue. + + No producer can stop the progress for other producers. + + Consumer can obstruct a slot of a delayed producer by putting special mark. + Thus no producer can stop the progress for consumer. + But a slow producer may be forced to retry unlimited number of times. + Though it's very unlikely for a non-preempted producer to be obstructed. + That's why the algorithm is semi-wait-free. + + WARNING: there is no wait¬ify mechanic for consumer, + consumer receives nullptr if queue was empty. + + WARNING: though the algorithm itself is lock-free + but producers and consumer could be blocked by memory allocator + + WARNING: copy constructers of the queue are not thread-safe + */ + +#include <util/generic/noncopyable.h> +#include <util/generic/ptr.h> +#include <util/system/atomic.h> +#include <util/system/spinlock.h> + +#include "tune.h" + +namespace NThreading { + namespace NObstructiveQueuePrivate { + typedef void* TMsgLink; + + struct TEmpty { + }; + + struct TEmptyAux { + TEmptyAux Retrieve() const { + return TEmptyAux(); + } + void Store(TEmptyAux&) { + } + static constexpr TEmptyAux Zero() { + return TEmptyAux(); + } + }; + + template <typename TAux> + struct TSlot { + TMsgLink volatile Msg; + TAux AuxiliaryData; + + inline void Store(TAux& aux) { + AuxiliaryData.Store(aux); + } + + inline TAux Retrieve() const { + return AuxiliaryData.Retrieve(); + } + + static TSlot<TAux> NullElem() { + return {nullptr, TAux::Zero()}; + } + + static TSlot<TAux> Pair(TMsgLink msg, TAux aux) { + return {msg, std::move(aux)}; + } + }; + + template <> + struct TSlot<TEmptyAux> { + TMsgLink volatile Msg; + inline void Store(TEmptyAux&) { + } + inline TEmptyAux Retrieve() const { + return TEmptyAux(); + } + + static TSlot<TEmptyAux> NullElem() { + return {nullptr}; + } + + static TSlot<TEmptyAux> Pair(TMsgLink msg, TEmptyAux) { + return {msg}; + } + }; + + enum TPushResult { + PUSH_RESULT_OK, + PUSH_RESULT_BACKWARD, + PUSH_RESULT_FORWARD, + PUSH_RESULT_BLOCKED, + }; + + template <typename TAux, ui32 BUNCH_SIZE, typename TBase = TEmpty> + struct TMsgBunch: public TBase { + ui64 FirstSlot; + + TSlot<TAux> LinkArray[BUNCH_SIZE]; + + TMsgBunch* volatile NextBunch; + TMsgBunch* volatile BackLink; + + ui64 volatile Token; + TMsgBunch* volatile NextToken; + + /* this push can return PUSH_RESULT_BLOCKED */ + inline TPushResult Push(TMsgLink msg, ui64 slot, TAux auxiliary) { + if (Y_UNLIKELY(slot < FirstSlot)) { + return PUSH_RESULT_BACKWARD; + } + + if (Y_UNLIKELY(slot >= FirstSlot + BUNCH_SIZE)) { + return PUSH_RESULT_FORWARD; + } + + LinkArray[slot - FirstSlot].Store(auxiliary); + + auto oldValue = AtomicSwap(&LinkArray[slot - FirstSlot].Msg, msg); + + if (Y_LIKELY(oldValue == nullptr)) { + return PUSH_RESULT_OK; + } else { + LeaveBlocked(oldValue); + return PUSH_RESULT_BLOCKED; + } + } + + inline bool IsSlotHere(ui64 slot) { + return slot < FirstSlot + BUNCH_SIZE; + } + + inline TMsgLink GetSlot(ui64 slot) const { + return AtomicGet(LinkArray[slot - FirstSlot].Msg); + } + + inline TSlot<TAux> GetSlotAux(ui64 slot) const { + auto msg = GetSlot(slot); + auto aux = LinkArray[slot - FirstSlot].Retrieve(); + return TSlot<TAux>::Pair(msg, aux); + } + + void LeaveBlocked(ui64 slot) { + auto token = GetToken(slot); + token->DecrementToken(); + } + + void LeaveBlocked(TMsgLink msg) { + auto token = reinterpret_cast<TMsgBunch*>(msg); + token->DecrementToken(); + } + + TSlot<TAux> BlockSlotAux(ui64 slot, TMsgBunch* token) { + auto old = + AtomicSwap(&LinkArray[slot - FirstSlot].Msg, (TMsgLink)token); + if (old == nullptr) { + // It's valid to increment after AtomicCas + // because token will release data only after SetNextToken + token->IncrementToken(); + return TSlot<TAux>::NullElem(); + } + return TSlot<TAux>::Pair(old, LinkArray[slot - FirstSlot].Retrieve()); + } + + inline TMsgBunch* GetNextBunch() const { + return AtomicGet(NextBunch); + } + + inline bool SetNextBunch(TMsgBunch* ptr) { + return AtomicCas(&NextBunch, ptr, nullptr); + } + + inline TMsgBunch* GetBackLink() const { + return AtomicGet(BackLink); + } + + inline TMsgBunch* GetToken(ui64 slot) { + return reinterpret_cast<TMsgBunch*>(LinkArray[slot - FirstSlot].Msg); + } + + inline void IncrementToken() { + AtomicIncrement(Token); + } + + // the object could be destroyed after this method + inline void DecrementToken() { + if (Y_UNLIKELY(AtomicDecrement(Token) == BUNCH_SIZE)) { + Release(this); + AtomicGet(NextToken)->DecrementToken(); + // this could be invalid here + } + } + + // the object could be destroyed after this method + inline void SetNextToken(TMsgBunch* next) { + AtomicSet(NextToken, next); + if (Y_UNLIKELY(AtomicAdd(Token, BUNCH_SIZE) == BUNCH_SIZE)) { + Release(this); + next->DecrementToken(); + } + // this could be invalid here + } + + TMsgBunch(ui64 start, TMsgBunch* backLink) { + AtomicSet(FirstSlot, start); + memset(&LinkArray, 0, sizeof(LinkArray)); + AtomicSet(NextBunch, nullptr); + AtomicSet(BackLink, backLink); + + AtomicSet(Token, 1); + AtomicSet(NextToken, nullptr); + } + + static void Release(TMsgBunch* bunch) { + auto backLink = AtomicGet(bunch->BackLink); + if (backLink == nullptr) { + return; + } + AtomicSet(bunch->BackLink, nullptr); + + do { + auto bbackLink = backLink->BackLink; + delete backLink; + backLink = bbackLink; + } while (backLink != nullptr); + } + + void Destroy() { + for (auto tail = BackLink; tail != nullptr;) { + auto next = tail->BackLink; + delete tail; + tail = next; + } + + for (auto next = this; next != nullptr;) { + auto nnext = next->NextBunch; + delete next; + next = nnext; + } + } + }; + + template <typename TAux, ui32 BUNCH_SIZE, typename TBunchBase = TEmpty> + class TWriteBucket { + public: + static const ui64 GROSS_SIZE; + + using TBunch = TMsgBunch<TAux, BUNCH_SIZE, TBunchBase>; + + TWriteBucket(TBunch* bunch = new TBunch(0, nullptr)) + : LastBunch(bunch) + , SlotCounter(0) + { + } + + TWriteBucket(TWriteBucket&& move) + : LastBunch(move.LastBunch) + , SlotCounter(move.SlotCounter) + { + move.LastBunch = nullptr; + } + + ~TWriteBucket() { + if (LastBunch != nullptr) { + LastBunch->Destroy(); + } + } + + inline bool Push(TMsgLink msg, TAux aux) { + ui64 pushSlot = AtomicGetAndIncrement(SlotCounter); + TBunch* hintBunch = GetLastBunch(); + + for (;;) { + auto hint = hintBunch->Push(msg, pushSlot, aux); + if (Y_LIKELY(hint == PUSH_RESULT_OK)) { + return true; + } + bool hhResult = HandleHint(hintBunch, hint); + if (Y_UNLIKELY(!hhResult)) { + return false; + } + } + } + + protected: + template <typename, ui32, typename> + friend class TReadBucket; + + TBunch* volatile LastBunch; // Hint + volatile ui64 SlotCounter; + + inline TBunch* GetLastBunch() const { + return AtomicGet(LastBunch); + } + + bool HandleHint(TBunch*& hintBunch, TPushResult hint) { + if (Y_UNLIKELY(hint == PUSH_RESULT_BLOCKED)) { + return false; + } + + if (Y_UNLIKELY(hint == PUSH_RESULT_BACKWARD)) { + hintBunch = hintBunch->GetBackLink(); + return true; + } + + // PUSH_RESULT_FORWARD + auto nextBunch = hintBunch->GetNextBunch(); + + if (nextBunch == nullptr) { + auto first = hintBunch->FirstSlot + BUNCH_SIZE; + nextBunch = new TBunch(first, hintBunch); + if (Y_UNLIKELY(!hintBunch->SetNextBunch(nextBunch))) { + delete nextBunch; + nextBunch = hintBunch->GetNextBunch(); + } + } + + // hintBunch could not be freed here so it cannot be reused + // it's alright if this CAS was not succeeded, + // it means that other thread did that recently + AtomicCas(&LastBunch, nextBunch, hintBunch); + + hintBunch = nextBunch; + return true; + } + }; + + template <typename TAux, ui32 BUNCH_SIZE, typename TBunchBase> + class TReadBucket { + public: + static constexpr int MAX_NUMBER_OF_TRIES_TO_READ = 20; + + using TWBucket = TWriteBucket<TAux, BUNCH_SIZE, TBunchBase>; + using TBunch = TMsgBunch<TAux, BUNCH_SIZE, TBunchBase>; + + TReadBucket(TWBucket* writer) + : Writer(writer) + , ReadBunch(writer->GetLastBunch()) + , LastKnownPushBunch(writer->GetLastBunch()) + { + ReadBunch->DecrementToken(); // no previous token + } + + TReadBucket(TReadBucket toCopy, TWBucket* writer) + : TReadBucket(std::move(toCopy)) + { + Writer = writer; + } + + ui64 ReadyCount() const { + return AtomicGet(Writer->SlotCounter) - ReadSlot; + } + + inline TMsgLink Pop() { + return PopAux().Msg; + } + + inline TSlot<TAux> PopAux() { + for (;;) { + if (Y_UNLIKELY(ReadSlot == LastKnownPushSlot)) { + if (Y_LIKELY(!RereadPushSlot())) { + return TSlot<TAux>::NullElem(); + } + } + + if (Y_UNLIKELY(!ReadBunch->IsSlotHere(ReadSlot))) { + if (Y_UNLIKELY(!SwitchToNextBunch())) { + return TSlot<TAux>::NullElem(); + } + } + + auto result = ReadBunch->GetSlotAux(ReadSlot); + if (Y_LIKELY(result.Msg != nullptr)) { + ++ReadSlot; + return result; + } + + if (ReadSlot + 1 == AtomicGet(Writer->SlotCounter)) { + return TSlot<TAux>::NullElem(); + } + + result = StubbornPopAux(); + + if (result.Msg != nullptr) { + return result; + } + } + } + + private: + TWBucket* Writer; + TBunch* ReadBunch; + ui64 ReadSlot = 0; + TBunch* LastKnownPushBunch; + ui64 LastKnownPushSlot = 0; + + // MUST BE: ReadSlot == LastKnownPushSlot + bool RereadPushSlot() { + auto oldSlot = LastKnownPushSlot; + + auto currentPushBunch = Writer->GetLastBunch(); + auto currentPushSlot = AtomicGet(Writer->SlotCounter); + + if (currentPushBunch != LastKnownPushBunch) { + // LastKnownPushBunch could be invalid after this line + LastKnownPushBunch->SetNextToken(currentPushBunch); + } + + LastKnownPushBunch = currentPushBunch; + LastKnownPushSlot = currentPushSlot; + + return oldSlot != LastKnownPushSlot; + } + + bool SwitchToNextBunch() { + for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) { + auto next = ReadBunch->GetNextBunch(); + if (next != nullptr) { + ReadBunch = next; + return true; + } + SpinLockPause(); + } + return false; + } + + TSlot<TAux> StubbornPopAux() { + for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) { + auto result = ReadBunch->GetSlotAux(ReadSlot); + if (Y_LIKELY(result.Msg != nullptr)) { + ++ReadSlot; + return result; + } + SpinLockPause(); + } + + return ReadBunch->BlockSlotAux(ReadSlot++, LastKnownPushBunch); + } + }; + + struct TDefaultParams { + static constexpr bool DeleteItems = true; + using TAux = NObstructiveQueuePrivate::TEmptyAux; + using TBunchBase = NObstructiveQueuePrivate::TEmpty; + static constexpr ui32 BUNCH_SIZE = 251; + }; + + } //namespace NObstructiveQueuePrivate + + DeclareTuneValueParam(TObstructiveQueueBunchSize, ui32, BUNCH_SIZE); + DeclareTuneValueParam(TObstructiveQueueDeleteItems, bool, DeleteItems); + DeclareTuneTypeParam(TObstructiveQueueBunchBase, TBunchBase); + DeclareTuneTypeParam(TObstructiveQueueAux, TAux); + template <typename TItem = void, typename... TParams> - class TObstructiveConsumerAuxQueue { - private: - using TTuned = - TTune<NObstructiveQueuePrivate::TDefaultParams, TParams...>; - - using TAux = typename TTuned::TAux; - using TSlot = NObstructiveQueuePrivate::TSlot<TAux>; - using TMsgLink = NObstructiveQueuePrivate::TMsgLink; - using TBunchBase = typename TTuned::TBunchBase; - static constexpr bool DeleteItems = TTuned::DeleteItems; - static constexpr ui32 BUNCH_SIZE = TTuned::BUNCH_SIZE; - - public: - TObstructiveConsumerAuxQueue() - : RBuckets(&WBucket) - { - } - - ~TObstructiveConsumerAuxQueue() { - if (DeleteItems) { - for (;;) { - auto msg = Pop(); - if (msg == nullptr) { - break; - } - TDelete::Destroy(msg); - } - } - } - - void Push(TItem* msg) { - while (!WBucket.Push(reinterpret_cast<TMsgLink>(msg), TAux())) { - } - } - - TItem* Pop() { - return reinterpret_cast<TItem*>(RBuckets.Pop()); - } - - TSlot PopAux() { - return RBuckets.PopAux(); - } - - private: - NObstructiveQueuePrivate::TWriteBucket<TAux, BUNCH_SIZE, TBunchBase> - WBucket; - NObstructiveQueuePrivate::TReadBucket<TAux, BUNCH_SIZE, TBunchBase> - RBuckets; - }; - - template <typename TItem = void, bool DeleteItems = true> - class TObstructiveConsumerQueue + class TObstructiveConsumerAuxQueue { + private: + using TTuned = + TTune<NObstructiveQueuePrivate::TDefaultParams, TParams...>; + + using TAux = typename TTuned::TAux; + using TSlot = NObstructiveQueuePrivate::TSlot<TAux>; + using TMsgLink = NObstructiveQueuePrivate::TMsgLink; + using TBunchBase = typename TTuned::TBunchBase; + static constexpr bool DeleteItems = TTuned::DeleteItems; + static constexpr ui32 BUNCH_SIZE = TTuned::BUNCH_SIZE; + + public: + TObstructiveConsumerAuxQueue() + : RBuckets(&WBucket) + { + } + + ~TObstructiveConsumerAuxQueue() { + if (DeleteItems) { + for (;;) { + auto msg = Pop(); + if (msg == nullptr) { + break; + } + TDelete::Destroy(msg); + } + } + } + + void Push(TItem* msg) { + while (!WBucket.Push(reinterpret_cast<TMsgLink>(msg), TAux())) { + } + } + + TItem* Pop() { + return reinterpret_cast<TItem*>(RBuckets.Pop()); + } + + TSlot PopAux() { + return RBuckets.PopAux(); + } + + private: + NObstructiveQueuePrivate::TWriteBucket<TAux, BUNCH_SIZE, TBunchBase> + WBucket; + NObstructiveQueuePrivate::TReadBucket<TAux, BUNCH_SIZE, TBunchBase> + RBuckets; + }; + + template <typename TItem = void, bool DeleteItems = true> + class TObstructiveConsumerQueue : public TObstructiveConsumerAuxQueue<TItem, TObstructiveQueueDeleteItems<DeleteItems>> { - }; + }; } diff --git a/library/cpp/threading/queue/queue_ut.cpp b/library/cpp/threading/queue/queue_ut.cpp index 80eca147da..8b36437034 100644 --- a/library/cpp/threading/queue/queue_ut.cpp +++ b/library/cpp/threading/queue/queue_ut.cpp @@ -1,242 +1,242 @@ #include <library/cpp/testing/unittest/registar.h> -#include <util/system/thread.h> - -#include "ut_helpers.h" - -typedef void* TMsgLink; - +#include <util/system/thread.h> + +#include "ut_helpers.h" + +typedef void* TMsgLink; + template <typename TQueueType> -class TQueueTestProcs: public TTestBase { -private: +class TQueueTestProcs: public TTestBase { +private: UNIT_TEST_SUITE_DEMANGLE(TQueueTestProcs<TQueueType>); - UNIT_TEST(Threads2_Push1M_Threads1_Pop2M) - UNIT_TEST(Threads4_Push1M_Threads1_Pop4M) - UNIT_TEST(Threads8_RndPush100K_Threads8_Queues) + UNIT_TEST(Threads2_Push1M_Threads1_Pop2M) + UNIT_TEST(Threads4_Push1M_Threads1_Pop4M) + UNIT_TEST(Threads8_RndPush100K_Threads8_Queues) /* - UNIT_TEST(Threads24_RndPush100K_Threads24_Queues) - UNIT_TEST(Threads24_RndPush100K_Threads8_Queues) - UNIT_TEST(Threads24_RndPush100K_Threads4_Queues) -*/ - UNIT_TEST_SUITE_END(); - -public: - void Push1M_Pop1M() { + UNIT_TEST(Threads24_RndPush100K_Threads24_Queues) + UNIT_TEST(Threads24_RndPush100K_Threads8_Queues) + UNIT_TEST(Threads24_RndPush100K_Threads4_Queues) +*/ + UNIT_TEST_SUITE_END(); + +public: + void Push1M_Pop1M() { TQueueType queue; - TMsgLink msg = &queue; - - auto pmsg = queue.Pop(); - UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); - - for (int i = 0; i < 1000000; ++i) { - queue.Push((char*)msg + i); - } - - for (int i = 0; i < 1000000; ++i) { - auto popped = queue.Pop(); - UNIT_ASSERT_EQUAL((char*)msg + i, popped); - } - - pmsg = queue.Pop(); - UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); - } - - void Threads2_Push1M_Threads1_Pop2M() { + TMsgLink msg = &queue; + + auto pmsg = queue.Pop(); + UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); + + for (int i = 0; i < 1000000; ++i) { + queue.Push((char*)msg + i); + } + + for (int i = 0; i < 1000000; ++i) { + auto popped = queue.Pop(); + UNIT_ASSERT_EQUAL((char*)msg + i, popped); + } + + pmsg = queue.Pop(); + UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); + } + + void Threads2_Push1M_Threads1_Pop2M() { TQueueType queue; - + class TPusherThread: public ISimpleThread { - public: + public: TPusherThread(TQueueType& theQueue, char* start) : Queue(theQueue) - , Arg(start) - { - } - + , Arg(start) + { + } + TQueueType& Queue; - char* Arg; - - void* ThreadProc() override { - for (int i = 0; i < 1000000; ++i) { - Queue.Push(Arg + i); - } - return nullptr; - } - }; - - TPusherThread pusher1(queue, (char*)&queue); - TPusherThread pusher2(queue, (char*)&queue + 2000000); - - pusher1.Start(); - pusher2.Start(); - - for (int i = 0; i < 2000000; ++i) { - while (queue.Pop() == nullptr) { - SpinLockPause(); - } - } - - auto pmsg = queue.Pop(); - UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); - } - - void Threads4_Push1M_Threads1_Pop4M() { + char* Arg; + + void* ThreadProc() override { + for (int i = 0; i < 1000000; ++i) { + Queue.Push(Arg + i); + } + return nullptr; + } + }; + + TPusherThread pusher1(queue, (char*)&queue); + TPusherThread pusher2(queue, (char*)&queue + 2000000); + + pusher1.Start(); + pusher2.Start(); + + for (int i = 0; i < 2000000; ++i) { + while (queue.Pop() == nullptr) { + SpinLockPause(); + } + } + + auto pmsg = queue.Pop(); + UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); + } + + void Threads4_Push1M_Threads1_Pop4M() { TQueueType queue; - + class TPusherThread: public ISimpleThread { - public: + public: TPusherThread(TQueueType& theQueue, char* start) : Queue(theQueue) - , Arg(start) - { - } - + , Arg(start) + { + } + TQueueType& Queue; - char* Arg; - - void* ThreadProc() override { - for (int i = 0; i < 1000000; ++i) { - Queue.Push(Arg + i); - } - return nullptr; - } - }; - - TPusherThread pusher1(queue, (char*)&queue); - TPusherThread pusher2(queue, (char*)&queue + 2000000); - TPusherThread pusher3(queue, (char*)&queue + 4000000); - TPusherThread pusher4(queue, (char*)&queue + 6000000); - - pusher1.Start(); - pusher2.Start(); - pusher3.Start(); - pusher4.Start(); - - for (int i = 0; i < 4000000; ++i) { - while (queue.Pop() == nullptr) { - SpinLockPause(); - } - } - - auto pmsg = queue.Pop(); - UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); - } - - template <size_t NUMBER_OF_PUSHERS, size_t NUMBER_OF_QUEUES> - void ManyRndPush100K_ManyQueues() { + char* Arg; + + void* ThreadProc() override { + for (int i = 0; i < 1000000; ++i) { + Queue.Push(Arg + i); + } + return nullptr; + } + }; + + TPusherThread pusher1(queue, (char*)&queue); + TPusherThread pusher2(queue, (char*)&queue + 2000000); + TPusherThread pusher3(queue, (char*)&queue + 4000000); + TPusherThread pusher4(queue, (char*)&queue + 6000000); + + pusher1.Start(); + pusher2.Start(); + pusher3.Start(); + pusher4.Start(); + + for (int i = 0; i < 4000000; ++i) { + while (queue.Pop() == nullptr) { + SpinLockPause(); + } + } + + auto pmsg = queue.Pop(); + UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); + } + + template <size_t NUMBER_OF_PUSHERS, size_t NUMBER_OF_QUEUES> + void ManyRndPush100K_ManyQueues() { TQueueType queue[NUMBER_OF_QUEUES]; - + class TPusherThread: public ISimpleThread { - public: + public: TPusherThread(TQueueType* queues, char* start) - : Queues(queues) - , Arg(start) - { - } - + : Queues(queues) + , Arg(start) + { + } + TQueueType* Queues; - char* Arg; - - void* ThreadProc() override { - ui64 counters[NUMBER_OF_QUEUES]; - for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) { - counters[i] = 0; - } - - for (int i = 0; i < 100000; ++i) { - size_t rnd = GetCycleCount() % NUMBER_OF_QUEUES; - int cookie = counters[rnd]++; - Queues[rnd].Push(Arg + cookie); - } - - for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) { - Queues[i].Push((void*)2ULL); - } - - return nullptr; - } - }; - + char* Arg; + + void* ThreadProc() override { + ui64 counters[NUMBER_OF_QUEUES]; + for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) { + counters[i] = 0; + } + + for (int i = 0; i < 100000; ++i) { + size_t rnd = GetCycleCount() % NUMBER_OF_QUEUES; + int cookie = counters[rnd]++; + Queues[rnd].Push(Arg + cookie); + } + + for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) { + Queues[i].Push((void*)2ULL); + } + + return nullptr; + } + }; + class TPopperThread: public ISimpleThread { - public: + public: TPopperThread(TQueueType* theQueue, char* base) : Queue(theQueue) - , Base(base) - { - } - + , Base(base) + { + } + TQueueType* Queue; - char* Base; - - void* ThreadProc() override { - ui64 counters[NUMBER_OF_PUSHERS]; - for (size_t i = 0; i < NUMBER_OF_PUSHERS; ++i) { - counters[i] = 0; - } - - for (size_t fin = 0; fin < NUMBER_OF_PUSHERS;) { - auto msg = Queue->Pop(); - if (msg == nullptr) { - SpinLockPause(); - continue; - } - if (msg == (void*)2ULL) { - ++fin; - continue; - } - ui64 shift = (char*)msg - Base; - auto pusherNum = shift / 200000000ULL; - auto msgNum = shift % 200000000ULL; - - UNIT_ASSERT_EQUAL(counters[pusherNum], msgNum); - ++counters[pusherNum]; - } - - auto pmsg = Queue->Pop(); - UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); - - return nullptr; - } - }; - + char* Base; + + void* ThreadProc() override { + ui64 counters[NUMBER_OF_PUSHERS]; + for (size_t i = 0; i < NUMBER_OF_PUSHERS; ++i) { + counters[i] = 0; + } + + for (size_t fin = 0; fin < NUMBER_OF_PUSHERS;) { + auto msg = Queue->Pop(); + if (msg == nullptr) { + SpinLockPause(); + continue; + } + if (msg == (void*)2ULL) { + ++fin; + continue; + } + ui64 shift = (char*)msg - Base; + auto pusherNum = shift / 200000000ULL; + auto msgNum = shift % 200000000ULL; + + UNIT_ASSERT_EQUAL(counters[pusherNum], msgNum); + ++counters[pusherNum]; + } + + auto pmsg = Queue->Pop(); + UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); + + return nullptr; + } + }; + TVector<TAutoPtr<TPopperThread>> poppers; TVector<TAutoPtr<TPusherThread>> pushers; - - for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) { - poppers.emplace_back(new TPopperThread(&queue[i], (char*)&queue)); - poppers.back()->Start(); - } - - for (size_t i = 0; i < NUMBER_OF_PUSHERS; ++i) { - pushers.emplace_back( - new TPusherThread(queue, (char*)&queue + 200000000ULL * i)); - pushers.back()->Start(); - } - - for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) { - poppers[i]->Join(); - } - - for (size_t i = 0; i < NUMBER_OF_PUSHERS; ++i) { - pushers[i]->Join(); - } - } - - void Threads8_RndPush100K_Threads8_Queues() { - ManyRndPush100K_ManyQueues<8, 8>(); - } - - /* - void Threads24_RndPush100K_Threads24_Queues() { - ManyRndPush100K_ManyQueues<24, 24>(); - } - - void Threads24_RndPush100K_Threads8_Queues() { - ManyRndPush100K_ManyQueues<24, 8>(); - } - - void Threads24_RndPush100K_Threads4_Queues() { - ManyRndPush100K_ManyQueues<24, 4>(); - } - */ -}; - -REGISTER_TESTS_FOR_ALL_ORDERED_QUEUES(TQueueTestProcs); + + for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) { + poppers.emplace_back(new TPopperThread(&queue[i], (char*)&queue)); + poppers.back()->Start(); + } + + for (size_t i = 0; i < NUMBER_OF_PUSHERS; ++i) { + pushers.emplace_back( + new TPusherThread(queue, (char*)&queue + 200000000ULL * i)); + pushers.back()->Start(); + } + + for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) { + poppers[i]->Join(); + } + + for (size_t i = 0; i < NUMBER_OF_PUSHERS; ++i) { + pushers[i]->Join(); + } + } + + void Threads8_RndPush100K_Threads8_Queues() { + ManyRndPush100K_ManyQueues<8, 8>(); + } + + /* + void Threads24_RndPush100K_Threads24_Queues() { + ManyRndPush100K_ManyQueues<24, 24>(); + } + + void Threads24_RndPush100K_Threads8_Queues() { + ManyRndPush100K_ManyQueues<24, 8>(); + } + + void Threads24_RndPush100K_Threads4_Queues() { + ManyRndPush100K_ManyQueues<24, 4>(); + } + */ +}; + +REGISTER_TESTS_FOR_ALL_ORDERED_QUEUES(TQueueTestProcs); diff --git a/library/cpp/threading/queue/tune.h b/library/cpp/threading/queue/tune.h index 50fc3dc17c..43ad5efe3e 100644 --- a/library/cpp/threading/queue/tune.h +++ b/library/cpp/threading/queue/tune.h @@ -1,101 +1,101 @@ -#pragma once - -/* - Motivation: consider you have a template class with many parameters - with default associations - - template <typename A = TDefA, - typename B = TDefB, - typename C = TDefC, - typename D = TDefD> - class TExample { - }; - - consider you would like to provide easy to use interface to tune all - these parameters in position independed manner, - In that case TTune would be helpful for you. - - How to use: - First step: declare a struct with all default associations - - struct TDefaultTune { - using TStructA = TDefA; - using TStructB = TDefB; - using TStructC = TDefC; - using TStructD = TDefD; - }; - - Second step: declare helper names visible to a user - - DeclareTuneTypeParam(TTuneParamA, TStructA); - DeclareTuneTypeParam(TTuneParamB, TStructB); - DeclareTuneTypeParam(TTuneParamC, TStructC); - DeclareTuneTypeParam(TTuneParamD, TStructD); - - Third step: declare TExample this way: - - template <typename...TParams> - class TExample { - using TMyParams = TTune<TDefaultTune, TParams...>; - - using TActualA = TMyParams::TStructA; - using TActualB = TMyParams::TStructB; - ... - }; - - TTune<TDefaultTune, TParams...> is a struct with the default parameteres - taken from TDefaultTune and overridden from "TParams...". - - for example: "TTune<TDefaultTune, TTuneParamC<TUserClass>>" - will be virtually the same as: - - struct TTunedClass { - using TStructA = TDefA; - using TStructB = TDefB; - using TStructC = TUserClass; - using TStructD = TDefD; - }; - - From now on you can tune your TExample in the following manner: - - using TCustomClass = - TExample <TTuneParamA<TUserStruct1>, TTuneParamD<TUserStruct2>>; - - You can also tweak constant expressions in your TDefaultTune. - Consider you have: - - struct TDefaultTune { - static constexpr ui32 MySize = 42; - }; - - declare an interface to modify the parameter this way: - - DeclareTuneValueParam(TStructSize, ui32, MySize); - - and tweak your class: - - using TTwiceBigger = TExample<TStructSize<84>>; - - */ - -#define DeclareTuneTypeParam(TParamName, InternalName) \ - template <typename TNewType> \ - struct TParamName { \ - template <typename TBase> \ - struct TApply: public TBase { \ - using InternalName = TNewType; \ - }; \ - } - -#define DeclareTuneValueParam(TParamName, TValueType, InternalName) \ - template <TValueType NewValue> \ - struct TParamName { \ - template <typename TBase> \ - struct TApply: public TBase { \ - static constexpr TValueType InternalName = NewValue; \ - }; \ - } - +#pragma once + +/* + Motivation: consider you have a template class with many parameters + with default associations + + template <typename A = TDefA, + typename B = TDefB, + typename C = TDefC, + typename D = TDefD> + class TExample { + }; + + consider you would like to provide easy to use interface to tune all + these parameters in position independed manner, + In that case TTune would be helpful for you. + + How to use: + First step: declare a struct with all default associations + + struct TDefaultTune { + using TStructA = TDefA; + using TStructB = TDefB; + using TStructC = TDefC; + using TStructD = TDefD; + }; + + Second step: declare helper names visible to a user + + DeclareTuneTypeParam(TTuneParamA, TStructA); + DeclareTuneTypeParam(TTuneParamB, TStructB); + DeclareTuneTypeParam(TTuneParamC, TStructC); + DeclareTuneTypeParam(TTuneParamD, TStructD); + + Third step: declare TExample this way: + + template <typename...TParams> + class TExample { + using TMyParams = TTune<TDefaultTune, TParams...>; + + using TActualA = TMyParams::TStructA; + using TActualB = TMyParams::TStructB; + ... + }; + + TTune<TDefaultTune, TParams...> is a struct with the default parameteres + taken from TDefaultTune and overridden from "TParams...". + + for example: "TTune<TDefaultTune, TTuneParamC<TUserClass>>" + will be virtually the same as: + + struct TTunedClass { + using TStructA = TDefA; + using TStructB = TDefB; + using TStructC = TUserClass; + using TStructD = TDefD; + }; + + From now on you can tune your TExample in the following manner: + + using TCustomClass = + TExample <TTuneParamA<TUserStruct1>, TTuneParamD<TUserStruct2>>; + + You can also tweak constant expressions in your TDefaultTune. + Consider you have: + + struct TDefaultTune { + static constexpr ui32 MySize = 42; + }; + + declare an interface to modify the parameter this way: + + DeclareTuneValueParam(TStructSize, ui32, MySize); + + and tweak your class: + + using TTwiceBigger = TExample<TStructSize<84>>; + + */ + +#define DeclareTuneTypeParam(TParamName, InternalName) \ + template <typename TNewType> \ + struct TParamName { \ + template <typename TBase> \ + struct TApply: public TBase { \ + using InternalName = TNewType; \ + }; \ + } + +#define DeclareTuneValueParam(TParamName, TValueType, InternalName) \ + template <TValueType NewValue> \ + struct TParamName { \ + template <typename TBase> \ + struct TApply: public TBase { \ + static constexpr TValueType InternalName = NewValue; \ + }; \ + } + #define DeclareTuneContainer(TParamName, InternalName) \ template <template <typename, typename...> class TNewContainer> \ struct TParamName { \ @@ -104,22 +104,22 @@ template <typename TElem, typename... TRest> \ using InternalName = TNewContainer<TElem, TRest...>; \ }; \ - } - -namespace NTunePrivate { - template <typename TBase, typename... TParams> - struct TFold; - - template <typename TBase> - struct TFold<TBase>: public TBase { - }; - - template <typename TBase, typename TFirstArg, typename... TRest> - struct TFold<TBase, TFirstArg, TRest...> - : public TFold<typename TFirstArg::template TApply<TBase>, TRest...> { - }; -} - -template <typename TDefault, typename... TParams> -struct TTune: public NTunePrivate::TFold<TDefault, TParams...> { -}; + } + +namespace NTunePrivate { + template <typename TBase, typename... TParams> + struct TFold; + + template <typename TBase> + struct TFold<TBase>: public TBase { + }; + + template <typename TBase, typename TFirstArg, typename... TRest> + struct TFold<TBase, TFirstArg, TRest...> + : public TFold<typename TFirstArg::template TApply<TBase>, TRest...> { + }; +} + +template <typename TDefault, typename... TParams> +struct TTune: public NTunePrivate::TFold<TDefault, TParams...> { +}; diff --git a/library/cpp/threading/queue/tune_ut.cpp b/library/cpp/threading/queue/tune_ut.cpp index 7e980d3e27..64bc8fd427 100644 --- a/library/cpp/threading/queue/tune_ut.cpp +++ b/library/cpp/threading/queue/tune_ut.cpp @@ -1,118 +1,118 @@ #include <library/cpp/testing/unittest/registar.h> -#include "tune.h" - -struct TDefaultStructA { -}; - -struct TDefaultStructB { -}; - -struct TDefaults { - using TStructA = TDefaultStructA; - using TStructB = TDefaultStructB; - static constexpr ui32 Param1 = 42; - static constexpr ui32 Param2 = 42; -}; - -DeclareTuneTypeParam(TweakStructA, TStructA); -DeclareTuneTypeParam(TweakStructB, TStructB); -DeclareTuneValueParam(TweakParam1, ui32, Param1); -DeclareTuneValueParam(TweakParam2, ui32, Param2); - +#include "tune.h" + +struct TDefaultStructA { +}; + +struct TDefaultStructB { +}; + +struct TDefaults { + using TStructA = TDefaultStructA; + using TStructB = TDefaultStructB; + static constexpr ui32 Param1 = 42; + static constexpr ui32 Param2 = 42; +}; + +DeclareTuneTypeParam(TweakStructA, TStructA); +DeclareTuneTypeParam(TweakStructB, TStructB); +DeclareTuneValueParam(TweakParam1, ui32, Param1); +DeclareTuneValueParam(TweakParam2, ui32, Param2); + Y_UNIT_TEST_SUITE(TestTuning) { Y_UNIT_TEST(Defaults) { - using TTuned = TTune<TDefaults>; - using TunedA = TTuned::TStructA; - using TunedB = TTuned::TStructB; - auto sameA = std::is_same<TDefaultStructA, TunedA>::value; - auto sameB = std::is_same<TDefaultStructB, TunedB>::value; - auto param1 = TTuned::Param1; - auto param2 = TTuned::Param2; - - UNIT_ASSERT(sameA); - UNIT_ASSERT(sameB); - UNIT_ASSERT_EQUAL(param1, 42); - UNIT_ASSERT_EQUAL(param2, 42); - } - + using TTuned = TTune<TDefaults>; + using TunedA = TTuned::TStructA; + using TunedB = TTuned::TStructB; + auto sameA = std::is_same<TDefaultStructA, TunedA>::value; + auto sameB = std::is_same<TDefaultStructB, TunedB>::value; + auto param1 = TTuned::Param1; + auto param2 = TTuned::Param2; + + UNIT_ASSERT(sameA); + UNIT_ASSERT(sameB); + UNIT_ASSERT_EQUAL(param1, 42); + UNIT_ASSERT_EQUAL(param2, 42); + } + Y_UNIT_TEST(TuneStructA) { - struct TMyStruct { - }; - - using TTuned = TTune<TDefaults, TweakStructA<TMyStruct>>; - - using TunedA = TTuned::TStructA; - using TunedB = TTuned::TStructB; - //auto sameA = std::is_same<TDefaultStructA, TunedA>::value; - auto sameB = std::is_same<TDefaultStructB, TunedB>::value; - auto param1 = TTuned::Param1; - auto param2 = TTuned::Param2; - - auto sameA = std::is_same<TMyStruct, TunedA>::value; - - UNIT_ASSERT(sameA); - UNIT_ASSERT(sameB); - UNIT_ASSERT_EQUAL(param1, 42); - UNIT_ASSERT_EQUAL(param2, 42); - } - + struct TMyStruct { + }; + + using TTuned = TTune<TDefaults, TweakStructA<TMyStruct>>; + + using TunedA = TTuned::TStructA; + using TunedB = TTuned::TStructB; + //auto sameA = std::is_same<TDefaultStructA, TunedA>::value; + auto sameB = std::is_same<TDefaultStructB, TunedB>::value; + auto param1 = TTuned::Param1; + auto param2 = TTuned::Param2; + + auto sameA = std::is_same<TMyStruct, TunedA>::value; + + UNIT_ASSERT(sameA); + UNIT_ASSERT(sameB); + UNIT_ASSERT_EQUAL(param1, 42); + UNIT_ASSERT_EQUAL(param2, 42); + } + Y_UNIT_TEST(TuneParam1) { - using TTuned = TTune<TDefaults, TweakParam1<24>>; - - using TunedA = TTuned::TStructA; - using TunedB = TTuned::TStructB; - auto sameA = std::is_same<TDefaultStructA, TunedA>::value; - auto sameB = std::is_same<TDefaultStructB, TunedB>::value; - auto param1 = TTuned::Param1; - auto param2 = TTuned::Param2; - - UNIT_ASSERT(sameA); - UNIT_ASSERT(sameB); - UNIT_ASSERT_EQUAL(param1, 24); - UNIT_ASSERT_EQUAL(param2, 42); - } - + using TTuned = TTune<TDefaults, TweakParam1<24>>; + + using TunedA = TTuned::TStructA; + using TunedB = TTuned::TStructB; + auto sameA = std::is_same<TDefaultStructA, TunedA>::value; + auto sameB = std::is_same<TDefaultStructB, TunedB>::value; + auto param1 = TTuned::Param1; + auto param2 = TTuned::Param2; + + UNIT_ASSERT(sameA); + UNIT_ASSERT(sameB); + UNIT_ASSERT_EQUAL(param1, 24); + UNIT_ASSERT_EQUAL(param2, 42); + } + Y_UNIT_TEST(TuneStructAAndParam1) { - struct TMyStruct { - }; - - using TTuned = - TTune<TDefaults, TweakStructA<TMyStruct>, TweakParam1<24>>; - - using TunedA = TTuned::TStructA; - using TunedB = TTuned::TStructB; - //auto sameA = std::is_same<TDefaultStructA, TunedA>::value; - auto sameB = std::is_same<TDefaultStructB, TunedB>::value; - auto param1 = TTuned::Param1; - auto param2 = TTuned::Param2; - - auto sameA = std::is_same<TMyStruct, TunedA>::value; - - UNIT_ASSERT(sameA); - UNIT_ASSERT(sameB); - UNIT_ASSERT_EQUAL(param1, 24); - UNIT_ASSERT_EQUAL(param2, 42); - } - + struct TMyStruct { + }; + + using TTuned = + TTune<TDefaults, TweakStructA<TMyStruct>, TweakParam1<24>>; + + using TunedA = TTuned::TStructA; + using TunedB = TTuned::TStructB; + //auto sameA = std::is_same<TDefaultStructA, TunedA>::value; + auto sameB = std::is_same<TDefaultStructB, TunedB>::value; + auto param1 = TTuned::Param1; + auto param2 = TTuned::Param2; + + auto sameA = std::is_same<TMyStruct, TunedA>::value; + + UNIT_ASSERT(sameA); + UNIT_ASSERT(sameB); + UNIT_ASSERT_EQUAL(param1, 24); + UNIT_ASSERT_EQUAL(param2, 42); + } + Y_UNIT_TEST(TuneParam1AndStructA) { - struct TMyStruct { - }; - - using TTuned = - TTune<TDefaults, TweakParam1<24>, TweakStructA<TMyStruct>>; - - using TunedA = TTuned::TStructA; - using TunedB = TTuned::TStructB; - //auto sameA = std::is_same<TDefaultStructA, TunedA>::value; - auto sameB = std::is_same<TDefaultStructB, TunedB>::value; - auto param1 = TTuned::Param1; - auto param2 = TTuned::Param2; - - auto sameA = std::is_same<TMyStruct, TunedA>::value; - - UNIT_ASSERT(sameA); - UNIT_ASSERT(sameB); - UNIT_ASSERT_EQUAL(param1, 24); - UNIT_ASSERT_EQUAL(param2, 42); - } -} + struct TMyStruct { + }; + + using TTuned = + TTune<TDefaults, TweakParam1<24>, TweakStructA<TMyStruct>>; + + using TunedA = TTuned::TStructA; + using TunedB = TTuned::TStructB; + //auto sameA = std::is_same<TDefaultStructA, TunedA>::value; + auto sameB = std::is_same<TDefaultStructB, TunedB>::value; + auto param1 = TTuned::Param1; + auto param2 = TTuned::Param2; + + auto sameA = std::is_same<TMyStruct, TunedA>::value; + + UNIT_ASSERT(sameA); + UNIT_ASSERT(sameB); + UNIT_ASSERT_EQUAL(param1, 24); + UNIT_ASSERT_EQUAL(param2, 42); + } +} diff --git a/library/cpp/threading/queue/unordered_ut.cpp b/library/cpp/threading/queue/unordered_ut.cpp index a43b7f520e..2018538bf7 100644 --- a/library/cpp/threading/queue/unordered_ut.cpp +++ b/library/cpp/threading/queue/unordered_ut.cpp @@ -1,154 +1,154 @@ #include <library/cpp/testing/unittest/registar.h> -#include <util/system/thread.h> -#include <algorithm> -#include <util/generic/vector.h> -#include <util/random/fast.h> - -#include "ut_helpers.h" - +#include <util/system/thread.h> +#include <algorithm> +#include <util/generic/vector.h> +#include <util/random/fast.h> + +#include "ut_helpers.h" + template <typename TQueueType> -class TTestUnorderedQueue: public TTestBase { -private: - using TLink = TIntrusiveLink; - +class TTestUnorderedQueue: public TTestBase { +private: + using TLink = TIntrusiveLink; + UNIT_TEST_SUITE_DEMANGLE(TTestUnorderedQueue<TQueueType>); - UNIT_TEST(Push1M_Pop1M_Unordered) - UNIT_TEST_SUITE_END(); - -public: - void Push1M_Pop1M_Unordered() { - constexpr int REPEAT = 1000000; + UNIT_TEST(Push1M_Pop1M_Unordered) + UNIT_TEST_SUITE_END(); + +public: + void Push1M_Pop1M_Unordered() { + constexpr int REPEAT = 1000000; TQueueType queue; - TLink msg[REPEAT]; - - auto pmsg = queue.Pop(); - UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); - - for (int i = 0; i < REPEAT; ++i) { - queue.Push(&msg[i]); - } - + TLink msg[REPEAT]; + + auto pmsg = queue.Pop(); + UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); + + for (int i = 0; i < REPEAT; ++i) { + queue.Push(&msg[i]); + } + TVector<TLink*> popped; - popped.reserve(REPEAT); - for (int i = 0; i < REPEAT; ++i) { - popped.push_back((TLink*)queue.Pop()); - } - - pmsg = queue.Pop(); - UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); - - std::sort(popped.begin(), popped.end()); - for (int i = 0; i < REPEAT; ++i) { - UNIT_ASSERT_VALUES_EQUAL(&msg[i], popped[i]); - } - } -}; - + popped.reserve(REPEAT); + for (int i = 0; i < REPEAT; ++i) { + popped.push_back((TLink*)queue.Pop()); + } + + pmsg = queue.Pop(); + UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); + + std::sort(popped.begin(), popped.end()); + for (int i = 0; i < REPEAT; ++i) { + UNIT_ASSERT_VALUES_EQUAL(&msg[i], popped[i]); + } + } +}; + template <typename TQueueType> -class TTestWeakQueue: public TTestBase { -private: +class TTestWeakQueue: public TTestBase { +private: UNIT_TEST_SUITE_DEMANGLE(TTestWeakQueue<TQueueType>); - UNIT_TEST(Threads8_Rnd_Exchange) - UNIT_TEST_SUITE_END(); - -public: - template <ui16 COUNT = 48, ui32 MSG_COUNT = 10000> - void ManyThreadsRndExchange() { + UNIT_TEST(Threads8_Rnd_Exchange) + UNIT_TEST_SUITE_END(); + +public: + template <ui16 COUNT = 48, ui32 MSG_COUNT = 10000> + void ManyThreadsRndExchange() { TQueueType queues[COUNT]; - + class TWorker: public ISimpleThread { - public: - TWorker( + public: + TWorker( TQueueType* queues_, ui16 mine, TAtomic* pushDone) - : Queues(queues_) - , MineQueue(mine) - , PushDone(pushDone) - { - } - + : Queues(queues_) + , MineQueue(mine) + , PushDone(pushDone) + { + } + TQueueType* Queues; - ui16 MineQueue; + ui16 MineQueue; TVector<uintptr_t> Received; - TAtomic* PushDone; - - void* ThreadProc() override { - TReallyFastRng32 rng(GetCycleCount()); - Received.reserve(MSG_COUNT * 2); - - for (ui32 loop = 1; loop <= MSG_COUNT; ++loop) { - for (;;) { - auto msg = Queues[MineQueue].Pop(); - if (msg == nullptr) { - break; - } - - Received.push_back((uintptr_t)msg); - } - - ui16 rnd = rng.GenRand64() % COUNT; - ui64 msg = ((ui64)MineQueue << 32) + loop; - while (!Queues[rnd].Push((void*)msg)) { - } - } - - AtomicIncrement(*PushDone); - - for (;;) { - bool isItLast = AtomicGet(*PushDone) == COUNT; - auto msg = Queues[MineQueue].Pop(); - if (msg != nullptr) { - Received.push_back((uintptr_t)msg); - } else { - if (isItLast) { - break; - } - SpinLockPause(); - } - } - - for (ui64 last = 0;;) { - auto msg = Queues[MineQueue].UnsafeScanningPop(&last); - if (msg == nullptr) { - break; - } - Received.push_back((uintptr_t)msg); - } - - return nullptr; - } - }; - + TAtomic* PushDone; + + void* ThreadProc() override { + TReallyFastRng32 rng(GetCycleCount()); + Received.reserve(MSG_COUNT * 2); + + for (ui32 loop = 1; loop <= MSG_COUNT; ++loop) { + for (;;) { + auto msg = Queues[MineQueue].Pop(); + if (msg == nullptr) { + break; + } + + Received.push_back((uintptr_t)msg); + } + + ui16 rnd = rng.GenRand64() % COUNT; + ui64 msg = ((ui64)MineQueue << 32) + loop; + while (!Queues[rnd].Push((void*)msg)) { + } + } + + AtomicIncrement(*PushDone); + + for (;;) { + bool isItLast = AtomicGet(*PushDone) == COUNT; + auto msg = Queues[MineQueue].Pop(); + if (msg != nullptr) { + Received.push_back((uintptr_t)msg); + } else { + if (isItLast) { + break; + } + SpinLockPause(); + } + } + + for (ui64 last = 0;;) { + auto msg = Queues[MineQueue].UnsafeScanningPop(&last); + if (msg == nullptr) { + break; + } + Received.push_back((uintptr_t)msg); + } + + return nullptr; + } + }; + TVector<TAutoPtr<TWorker>> workers; - TAtomic pushDone = 0; - - for (ui32 i = 0; i < COUNT; ++i) { - workers.emplace_back(new TWorker(&queues[0], i, &pushDone)); - workers.back()->Start(); - } - + TAtomic pushDone = 0; + + for (ui32 i = 0; i < COUNT; ++i) { + workers.emplace_back(new TWorker(&queues[0], i, &pushDone)); + workers.back()->Start(); + } + TVector<uintptr_t> all; - for (ui32 i = 0; i < COUNT; ++i) { - workers[i]->Join(); - all.insert(all.begin(), + for (ui32 i = 0; i < COUNT; ++i) { + workers[i]->Join(); + all.insert(all.begin(), workers[i]->Received.begin(), workers[i]->Received.end()); - } - - std::sort(all.begin(), all.end()); - auto iter = all.begin(); - for (ui32 i = 0; i < COUNT; ++i) { - for (ui32 k = 1; k <= MSG_COUNT; ++k) { - UNIT_ASSERT_VALUES_EQUAL(((ui64)i << 32) + k, *iter); - ++iter; - } - } - } - - void Threads8_Rnd_Exchange() { - ManyThreadsRndExchange<8>(); - } -}; - -REGISTER_TESTS_FOR_ALL_UNORDERED_QUEUES(TTestUnorderedQueue); -UNIT_TEST_SUITE_REGISTRATION(TTestWeakQueue<TMPMCUnorderedRing>); + } + + std::sort(all.begin(), all.end()); + auto iter = all.begin(); + for (ui32 i = 0; i < COUNT; ++i) { + for (ui32 k = 1; k <= MSG_COUNT; ++k) { + UNIT_ASSERT_VALUES_EQUAL(((ui64)i << 32) + k, *iter); + ++iter; + } + } + } + + void Threads8_Rnd_Exchange() { + ManyThreadsRndExchange<8>(); + } +}; + +REGISTER_TESTS_FOR_ALL_UNORDERED_QUEUES(TTestUnorderedQueue); +UNIT_TEST_SUITE_REGISTRATION(TTestWeakQueue<TMPMCUnorderedRing>); diff --git a/library/cpp/threading/queue/ut/ya.make b/library/cpp/threading/queue/ut/ya.make index 8883d9bf69..dda204155e 100644 --- a/library/cpp/threading/queue/ut/ya.make +++ b/library/cpp/threading/queue/ut/ya.make @@ -1,16 +1,16 @@ UNITTEST_FOR(library/cpp/threading/queue) - + OWNER(agri) - -ALLOCATOR(B) - -SRCS( - basic_ut.cpp - queue_ut.cpp - tune_ut.cpp - unordered_ut.cpp - ut_helpers.cpp - ut_helpers.h -) - -END() + +ALLOCATOR(B) + +SRCS( + basic_ut.cpp + queue_ut.cpp + tune_ut.cpp + unordered_ut.cpp + ut_helpers.cpp + ut_helpers.h +) + +END() diff --git a/library/cpp/threading/queue/ut_helpers.cpp b/library/cpp/threading/queue/ut_helpers.cpp index aa3a831441..342aa125a0 100644 --- a/library/cpp/threading/queue/ut_helpers.cpp +++ b/library/cpp/threading/queue/ut_helpers.cpp @@ -1 +1 @@ -#include "ut_helpers.h" +#include "ut_helpers.h" diff --git a/library/cpp/threading/queue/ut_helpers.h b/library/cpp/threading/queue/ut_helpers.h index 2756b52601..c720366593 100644 --- a/library/cpp/threading/queue/ut_helpers.h +++ b/library/cpp/threading/queue/ut_helpers.h @@ -1,40 +1,40 @@ -#pragma once - -#include "mpsc_read_as_filled.h" -#include "mpsc_htswap.h" -#include "mpsc_vinfarr_obstructive.h" -#include "mpsc_intrusive_unordered.h" -#include "mpmc_unordered_ring.h" - -struct TBasicHTSwap: public NThreading::THTSwapQueue<> { -}; - -struct TBasicReadAsFilled: public NThreading::TReadAsFilledQueue<> { -}; - -struct TBasicObstructiveConsumer +#pragma once + +#include "mpsc_read_as_filled.h" +#include "mpsc_htswap.h" +#include "mpsc_vinfarr_obstructive.h" +#include "mpsc_intrusive_unordered.h" +#include "mpmc_unordered_ring.h" + +struct TBasicHTSwap: public NThreading::THTSwapQueue<> { +}; + +struct TBasicReadAsFilled: public NThreading::TReadAsFilledQueue<> { +}; + +struct TBasicObstructiveConsumer : public NThreading::TObstructiveConsumerQueue<> { -}; - -struct TBasicMPSCIntrusiveUnordered +}; + +struct TBasicMPSCIntrusiveUnordered : public NThreading::TMPSCIntrusiveUnordered { -}; - -struct TIntrusiveLink: public NThreading::TIntrusiveNode { -}; - -struct TMPMCUnorderedRing: public NThreading::TMPMCUnorderedRing { - TMPMCUnorderedRing() - : NThreading::TMPMCUnorderedRing(10000000) - { - } -}; - +}; + +struct TIntrusiveLink: public NThreading::TIntrusiveNode { +}; + +struct TMPMCUnorderedRing: public NThreading::TMPMCUnorderedRing { + TMPMCUnorderedRing() + : NThreading::TMPMCUnorderedRing(10000000) + { + } +}; + #define REGISTER_TESTS_FOR_ALL_ORDERED_QUEUES(TestTemplate) \ UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TBasicHTSwap>); \ UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TBasicReadAsFilled>); \ - UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TBasicObstructiveConsumer>) - + UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TBasicObstructiveConsumer>) + #define REGISTER_TESTS_FOR_ALL_UNORDERED_QUEUES(TestTemplate) \ - UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TBasicMPSCIntrusiveUnordered>); \ - UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TMPMCUnorderedRing>); + UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TBasicMPSCIntrusiveUnordered>); \ + UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TMPMCUnorderedRing>); diff --git a/library/cpp/threading/queue/ya.make b/library/cpp/threading/queue/ya.make index 6570b38ce5..3a11eb2d92 100644 --- a/library/cpp/threading/queue/ya.make +++ b/library/cpp/threading/queue/ya.make @@ -1,18 +1,18 @@ -LIBRARY() - -OWNER(agri) - -SRCS( - mpmc_unordered_ring.cpp - mpmc_unordered_ring.h - mpsc_htswap.cpp - mpsc_htswap.h - mpsc_intrusive_unordered.cpp - mpsc_intrusive_unordered.h - mpsc_read_as_filled.cpp - mpsc_read_as_filled.h - mpsc_vinfarr_obstructive.cpp - mpsc_vinfarr_obstructive.h -) - -END() +LIBRARY() + +OWNER(agri) + +SRCS( + mpmc_unordered_ring.cpp + mpmc_unordered_ring.h + mpsc_htswap.cpp + mpsc_htswap.h + mpsc_intrusive_unordered.cpp + mpsc_intrusive_unordered.h + mpsc_read_as_filled.cpp + mpsc_read_as_filled.h + mpsc_vinfarr_obstructive.cpp + mpsc_vinfarr_obstructive.h +) + +END() |