diff options
| author | agri <[email protected]> | 2022-02-10 16:48:12 +0300 | 
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:48:12 +0300 | 
| commit | d3530b2692e400bd4d29bd4f07cafaee139164e7 (patch) | |
| tree | b7ae636a74490e649a2ed0fdd5361f1bec83b9f9 /library/cpp | |
| parent | 0f4c5d1e8c0672bf0a1f2f2d8acac5ba24772435 (diff) | |
Restoring authorship annotation for <[email protected]>. Commit 1 of 2.
Diffstat (limited to 'library/cpp')
83 files changed, 5689 insertions, 5689 deletions
| diff --git a/library/cpp/actors/core/actor_bootstrapped.h b/library/cpp/actors/core/actor_bootstrapped.h index a37887c9398..e15bb86ce64 100644 --- a/library/cpp/actors/core/actor_bootstrapped.h +++ b/library/cpp/actors/core/actor_bootstrapped.h @@ -28,8 +28,8 @@ namespace NActors {              } else {                  static_assert(dependent_false<TDerived>::value, "No correct Bootstrap() signature");              } -        } - +        }  +           TActorBootstrapped()              : TActor<TDerived>(&TDerived::StateBootstrap)          {} diff --git a/library/cpp/actors/core/actorsystem.h b/library/cpp/actors/core/actorsystem.h index 40499d7586f..58d360edcce 100644 --- a/library/cpp/actors/core/actorsystem.h +++ b/library/cpp/actors/core/actorsystem.h @@ -129,7 +129,7 @@ namespace NActors {          virtual void SetRealTimeMode() const {}      }; - +       // could be proxy to in-pool schedulers (for NUMA-aware executors)      class ISchedulerThread : TNonCopyable {      public: @@ -352,7 +352,7 @@ namespace NActors {          NLog::TSettings* LoggerSettings() const {              return LoggerSettings0.Get();          } - +           void GetPoolStats(ui32 poolId, TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const;          void DeferPreStop(std::function<void()> fn) { @@ -360,8 +360,8 @@ namespace NActors {          }          /* This is the base for memory profiling tags. -       System sets memory profiling tag for debug version of lfalloc. -       The tag is set as "base_tag + actor_activity_type". */ +       System sets memory profiling tag for debug version of lfalloc.  +       The tag is set as "base_tag + actor_activity_type". */           static ui32 MemProfActivityBase;      };  } diff --git a/library/cpp/actors/core/event.cpp b/library/cpp/actors/core/event.cpp index 33f8ce2aaf3..1c05ffc3fef 100644 --- a/library/cpp/actors/core/event.cpp +++ b/library/cpp/actors/core/event.cpp @@ -1,7 +1,7 @@  #include "event.h" -#include "event_pb.h" - -namespace NActors { +#include "event_pb.h"  +  +namespace NActors {       const TScopeId TScopeId::LocallyGenerated{          Max<ui64>(), Max<ui64>() @@ -22,8 +22,8 @@ namespace NActors {              return chainBuf;          }          return new TEventSerializedData; -    } - +    }  +       TIntrusivePtr<TEventSerializedData> IEventHandle::GetChainBuffer() {          if (Buffer)              return Buffer; @@ -34,5 +34,5 @@ namespace NActors {              return Buffer;          }          return new TEventSerializedData; -    } -} +    }  +}  diff --git a/library/cpp/actors/core/event.h b/library/cpp/actors/core/event.h index 6ff02aaf943..081549071da 100644 --- a/library/cpp/actors/core/event.h +++ b/library/cpp/actors/core/event.h @@ -3,7 +3,7 @@  #include "defs.h"  #include "actorid.h"  #include "callstack.h" -#include "event_load.h" +#include "event_load.h"   #include <library/cpp/actors/wilson/wilson_trace.h> @@ -17,13 +17,13 @@ namespace NActors {      public:          virtual bool SerializeToArcadiaStream(TChunkSerializer*) const = 0;      }; - +       class IEventBase          : TNonCopyable,            public ISerializerToStream {      public:          // actual typing is performed by IEventHandle - +           virtual ~IEventBase() {          } @@ -87,7 +87,7 @@ namespace NActors {              Buffer.Reset();              return x;          } - +           enum EFlags {              FlagTrackDelivery = 1 << 0,              FlagForwardOnNondelivery = 1 << 1, @@ -236,7 +236,7 @@ namespace NActors {              , RewriteType(Type)          {          } - +           TIntrusivePtr<TEventSerializedData> GetChainBuffer();          TIntrusivePtr<TEventSerializedData> ReleaseChainBuffer(); @@ -248,15 +248,15 @@ namespace NActors {              } else {                  return 0;              } -        } +        }           bool HasBuffer() const {              return bool(Buffer); -        } +        }           bool HasEvent() const {              return bool(Event); -        } +        }           IEventBase* GetBase() {              if (!Event) { @@ -326,7 +326,7 @@ namespace NActors {      }                                                                   \      bool IsSerializable() const override {                              \          return false;                                                   \ -    } +    }   #define DEFINE_SIMPLE_NONLOCAL_EVENT(eventType, header)                 \      TString ToStringHeader() const override {                           \ @@ -340,5 +340,5 @@ namespace NActors {      }                                                                   \      bool IsSerializable() const override {                              \          return true;                                                    \ -    } +    }   } diff --git a/library/cpp/actors/core/event_load.h b/library/cpp/actors/core/event_load.h index 0dab1dd374c..da2adc28ea8 100644 --- a/library/cpp/actors/core/event_load.h +++ b/library/cpp/actors/core/event_load.h @@ -1,24 +1,24 @@ -#pragma once +#pragma once   #include <util/stream/walk.h> -#include <util/system/types.h> +#include <util/system/types.h>   #include <util/generic/string.h>  #include <library/cpp/actors/util/rope.h>  #include <library/cpp/actors/wilson/wilson_trace.h> - -namespace NActors { +  +namespace NActors {       class IEventHandle; - +       struct TConstIoVec {          const void* Data;          size_t Size;      }; - +       struct TIoVec {          void* Data;          size_t Size;      }; - +       class TEventSerializedData         : public TThrRefBase      { @@ -70,7 +70,7 @@ namespace NActors {              }              return result;          } - +           TRope EraseBack(size_t count) {              Y_VERIFY(count <= Rope.GetSize());              TRope::TIterator iter = Rope.End(); @@ -81,25 +81,25 @@ namespace NActors {          void Append(TRope&& from) {              Rope.Insert(Rope.End(), std::move(from));          } - +           void Append(TString buffer) {              if (buffer) {                  Rope.Insert(Rope.End(), TRope(std::move(buffer)));              }          }      }; -} - +}  +   class TChainBufWalk : public IWalkInput {      TIntrusivePtr<NActors::TEventSerializedData> Buffer;      TRope::TConstIterator Iter; - +   public:      TChainBufWalk(TIntrusivePtr<NActors::TEventSerializedData> buffer)          : Buffer(std::move(buffer))          , Iter(Buffer->GetBeginIter())      {} - +   private:      size_t DoUnboundedNext(const void **ptr) override {          const size_t size = Iter.ContiguousSize(); @@ -108,5 +108,5 @@ private:              Iter.AdvanceToNextContiguousBlock();          }          return size; -    } +    }   }; diff --git a/library/cpp/actors/core/event_local.h b/library/cpp/actors/core/event_local.h index 2845aa94dd9..2a4ff9fa55e 100644 --- a/library/cpp/actors/core/event_local.h +++ b/library/cpp/actors/core/event_local.h @@ -2,7 +2,7 @@  #include "event.h"  #include "scheduler_cookie.h" -#include "event_load.h" +#include "event_load.h"   #include <util/system/type_name.h>  namespace NActors { diff --git a/library/cpp/actors/core/event_pb.cpp b/library/cpp/actors/core/event_pb.cpp index 018ff9ac34e..bae0a0a64b3 100644 --- a/library/cpp/actors/core/event_pb.cpp +++ b/library/cpp/actors/core/event_pb.cpp @@ -1,6 +1,6 @@ -#include "event_pb.h" - -namespace NActors { +#include "event_pb.h"  +  +namespace NActors {       bool TRopeStream::Next(const void** data, int* size) {          *data = Iter.ContiguousData();          *size = Iter.ContiguousSize(); @@ -13,13 +13,13 @@ namespace NActors {          TotalByteCount += *size;          return *size != 0;      } - +       void TRopeStream::BackUp(int count) {          Y_VERIFY(count <= TotalByteCount);          Iter -= count;          TotalByteCount -= count;      } - +       bool TRopeStream::Skip(int count) {          if (static_cast<size_t>(TotalByteCount + count) > Size) {              count = Size - TotalByteCount; @@ -27,20 +27,20 @@ namespace NActors {          Iter += count;          TotalByteCount += count;          return static_cast<size_t>(TotalByteCount) != Size; -    } - +    }  +       TCoroutineChunkSerializer::TCoroutineChunkSerializer()          : TotalSerializedDataSize(0)          , Stack(64 * 1024)          , SelfClosure{this, TArrayRef(Stack.Begin(), Stack.End())}          , InnerContext(SelfClosure)      {} - +       TCoroutineChunkSerializer::~TCoroutineChunkSerializer() {          CancelFlag = true;          Resume();          Y_VERIFY(Finished); -    } +    }       bool TCoroutineChunkSerializer::AllowsAliasing() const {          return true; @@ -85,10 +85,10 @@ namespace NActors {              } else {                  InnerContext.SwitchTo(BufFeedContext);              } -        } +        }           return true; -    } - +    }  +       bool TCoroutineChunkSerializer::Next(void** data, int* size) {          if (CancelFlag || AbortFlag) {              return false; @@ -122,15 +122,15 @@ namespace NActors {          BufferPtr -= count;          SizeRemain += count;          TotalSerializedDataSize -= count; -    } - +    }  +       void TCoroutineChunkSerializer::Resume() {          TContMachineContext feedContext;          BufFeedContext = &feedContext;          feedContext.SwitchTo(&InnerContext);          BufFeedContext = nullptr; -    } - +    }  +       bool TCoroutineChunkSerializer::WriteRope(const TRope *rope) {          for (auto iter = rope->Begin(); iter.Valid(); iter.AdvanceToNextContiguousBlock()) {              if (!WriteAliasedRaw(iter.ContiguousData(), iter.ContiguousSize())) { @@ -156,14 +156,14 @@ namespace NActors {          return {Chunks, Chunks + NumChunks};      } - +       void TCoroutineChunkSerializer::SetSerializingEvent(const IEventBase *event) {          Y_VERIFY(Event == nullptr);          Event = event;          TotalSerializedDataSize = 0;          AbortFlag = false;      } - +       void TCoroutineChunkSerializer::Abort() {          Y_VERIFY(Event);          AbortFlag = true; @@ -181,8 +181,8 @@ namespace NActors {          }          Finished = true;          InnerContext.SwitchTo(BufFeedContext); -    } - +    }  +       bool TAllocChunkSerializer::Next(void** pdata, int* psize) {          if (Backup) {              // we have some data in backup rope -- move the first chunk from the backup rope to the buffer and return @@ -200,12 +200,12 @@ namespace NActors {              Buffers->Append(TRope(std::move(item)));          }          return true; -    } - +    }  +       void TAllocChunkSerializer::BackUp(int count) {          Backup.Insert(Backup.Begin(), Buffers->EraseBack(count));      } - +       bool TAllocChunkSerializer::WriteAliasedRaw(const void*, int) {          Y_VERIFY(false);          return false; diff --git a/library/cpp/actors/core/event_pb.h b/library/cpp/actors/core/event_pb.h index d7546b901a0..1c69d7e9bf9 100644 --- a/library/cpp/actors/core/event_pb.h +++ b/library/cpp/actors/core/event_pb.h @@ -1,15 +1,15 @@  #pragma once  #include "event.h" -#include "event_load.h" - +#include "event_load.h"  +   #include <google/protobuf/io/zero_copy_stream.h>  #include <google/protobuf/arena.h>  #include <library/cpp/actors/protos/actors.pb.h> -#include <util/generic/deque.h> -#include <util/system/context.h> -#include <util/system/filemap.h> -#include <array> +#include <util/generic/deque.h>  +#include <util/system/context.h>  +#include <util/system/filemap.h>  +#include <array>   namespace NActors { @@ -29,11 +29,11 @@ namespace NActors {          int64_t ByteCount() const override {              return TotalByteCount;          } - +       private:          int64_t TotalByteCount = 0;      }; - +       class TChunkSerializer : public NProtoBuf::io::ZeroCopyOutputStream {      public:          TChunkSerializer() = default; @@ -42,7 +42,7 @@ namespace NActors {          virtual bool WriteRope(const TRope *rope) = 0;          virtual bool WriteString(const TString *s) = 0;      }; - +       class TAllocChunkSerializer final : public TChunkSerializer {      public:          bool Next(void** data, int* size) override; @@ -51,7 +51,7 @@ namespace NActors {              return Buffers->GetSize();          }          bool WriteAliasedRaw(const void* data, int size) override; - +           // WARNING: these methods require owner to retain ownership and immutability of passed objects          bool WriteRope(const TRope *rope) override;          bool WriteString(const TString *s) override; @@ -62,19 +62,19 @@ namespace NActors {              }              return std::move(Buffers);          } - +       protected:          TIntrusivePtr<TEventSerializedData> Buffers = new TEventSerializedData;          TRope Backup;      }; - +       class TCoroutineChunkSerializer final : public TChunkSerializer, protected ITrampoLine {      public:          using TChunk = std::pair<const char*, size_t>;          TCoroutineChunkSerializer();          ~TCoroutineChunkSerializer(); - +           void SetSerializingEvent(const IEventBase *event);          void Abort();          std::pair<TChunk*, TChunk*> FeedBuf(void* data, size_t size); @@ -87,7 +87,7 @@ namespace NActors {          const IEventBase *GetCurrentEvent() const {              return Event;          } - +           bool Next(void** data, int* size) override;          void BackUp(int count) override;          int64_t ByteCount() const override { @@ -95,7 +95,7 @@ namespace NActors {          }          bool WriteAliasedRaw(const void* data, int size) override;          bool AllowsAliasing() const override; - +           bool WriteRope(const TRope *rope) override;          bool WriteString(const TString *s) override; @@ -103,7 +103,7 @@ namespace NActors {          void DoRun() override;          void Resume();          bool Produce(const void *data, size_t size); - +           i64 TotalSerializedDataSize;          TMappedAllocation Stack;          TContClosure SelfClosure; @@ -120,7 +120,7 @@ namespace NActors {          bool SerializationSuccess;          bool Finished = false;      }; - +   #ifdef ACTORLIB_HUGE_PB_SIZE      static const size_t EventMaxByteSize = 140 << 20; // (140MB)  #else @@ -137,9 +137,9 @@ namespace NActors {      public:          using ProtoRecordType = TRecord; - +           TEventPBBase() = default; - +           explicit TEventPBBase(const TRecord& rec)          {              Record = rec; @@ -153,7 +153,7 @@ namespace NActors {          TString ToStringHeader() const override {              return Record.GetTypeName();          } - +           TString ToString() const override {              return Record.ShortDebugString();          } @@ -274,7 +274,7 @@ namespace NActors {              ev->CachedByteSize = input->GetSize();              return ev.Release();          } - +           size_t GetCachedByteSize() const {              if (CachedByteSize == 0) {                  CachedByteSize = CalculateSerializedSize(); diff --git a/library/cpp/actors/core/events.h b/library/cpp/actors/core/events.h index 702cf50fadf..88103e888c1 100644 --- a/library/cpp/actors/core/events.h +++ b/library/cpp/actors/core/events.h @@ -1,11 +1,11 @@  #pragma once  #include "event.h" -#include "event_pb.h" +#include "event_pb.h"   #include <library/cpp/actors/protos/actors.pb.h>  #include <util/system/unaligned_mem.h> - +   namespace NActors {      struct TEvents {          enum EEventSpace { @@ -213,7 +213,7 @@ namespace NActors {          using TEvPoisonPill = TEvPoison; // Legacy name, deprecated          using TEvActorDied = TEvGone; -    }; +    };   }  template <> diff --git a/library/cpp/actors/core/executelater.h b/library/cpp/actors/core/executelater.h index e7a13c10053..53da5923733 100644 --- a/library/cpp/actors/core/executelater.h +++ b/library/cpp/actors/core/executelater.h @@ -1,10 +1,10 @@ -#pragma once - -#include "actor_bootstrapped.h" - -#include <utility> - -namespace NActors { +#pragma once  +  +#include "actor_bootstrapped.h"  +  +#include <utility>  +  +namespace NActors {       template <typename TCallback>      class TExecuteLater: public TActorBootstrapped<TExecuteLater<TCallback>> {      public: @@ -13,10 +13,10 @@ namespace NActors {          }          TExecuteLater( -            TCallback&& callback, -            IActor::EActivityType activityType, -            ui32 channel = 0, -            ui64 cookie = 0, +            TCallback&& callback,  +            IActor::EActivityType activityType,  +            ui32 channel = 0,  +            ui64 cookie = 0,               const TActorId& reportCompletionTo = TActorId(),              const TActorId& reportExceptionTo = TActorId()) noexcept              : Callback(std::move(callback)) @@ -27,16 +27,16 @@ namespace NActors {          {              this->SetActivityType(activityType);          } - +           void Bootstrap(const TActorContext& ctx) noexcept {              try {                  {                      /* RAII, Callback should be destroyed right before sending -                   TEvCallbackCompletion */ - +                   TEvCallbackCompletion */  +                       auto local = std::move(Callback);                      using T = decltype(local); - +                       if constexpr (std::is_invocable_v<T, const TActorContext&>) {                          local(ctx);                      } else { @@ -56,11 +56,11 @@ namespace NActors {                               new TEvents::TEvCallbackException(ctx.SelfID, msg),                               Channel, Cookie);                  } -            } - +            }  +               this->Die(ctx); -        } - +        }  +       private:          TCallback Callback;          const ui32 Channel; @@ -68,13 +68,13 @@ namespace NActors {          const TActorId ReportCompletionTo;          const TActorId ReportExceptionTo;      }; - +       template <typename T>      IActor* CreateExecuteLaterActor( -        T&& func, -        IActor::EActivityType activityType, -        ui32 channel = 0, -        ui64 cookie = 0, +        T&& func,  +        IActor::EActivityType activityType,  +        ui32 channel = 0,  +        ui64 cookie = 0,           const TActorId& reportCompletionTo = TActorId(),          const TActorId& reportExceptionTo = TActorId()) noexcept {          return new TExecuteLater<T>(std::forward<T>(func), @@ -84,4 +84,4 @@ namespace NActors {                                      reportCompletionTo,                                      reportExceptionTo);      } -} +}  diff --git a/library/cpp/actors/core/executor_pool_basic.cpp b/library/cpp/actors/core/executor_pool_basic.cpp index 4dce16939ae..3123e9b1a61 100644 --- a/library/cpp/actors/core/executor_pool_basic.cpp +++ b/library/cpp/actors/core/executor_pool_basic.cpp @@ -4,23 +4,23 @@  #include <library/cpp/actors/util/affinity.h>  #include <library/cpp/actors/util/datetime.h> -#ifdef _linux_ +#ifdef _linux_   #include <pthread.h> -#endif - +#endif  +   namespace NActors {      LWTRACE_USING(ACTORLIB_PROVIDER);      constexpr TDuration TBasicExecutorPool::DEFAULT_TIME_PER_MAILBOX;      TBasicExecutorPool::TBasicExecutorPool( -        ui32 poolId, -        ui32 threads, -        ui64 spinThreshold, +        ui32 poolId,  +        ui32 threads,  +        ui64 spinThreshold,           const TString& poolName,          TAffinity* affinity, -        TDuration timePerMailbox, -        ui32 eventsPerMailbox, +        TDuration timePerMailbox,  +        ui32 eventsPerMailbox,           int realtimePriority,          ui32 maxActivityType)          : TExecutorPoolBase(poolId, threads, affinity, maxActivityType) @@ -330,10 +330,10 @@ namespace NActors {              if (pthread_setschedparam(threadSelf, SCHED_FIFO, ¶m)) {                  Y_FAIL("Cannot set realtime priority");              } -        } -#else +        }  +#else           Y_UNUSED(RealtimePriority); -#endif +#endif       }      ui32 TBasicExecutorPool::GetThreadCount() const { diff --git a/library/cpp/actors/core/executor_pool_basic.h b/library/cpp/actors/core/executor_pool_basic.h index 023190f7fe3..65ceed26696 100644 --- a/library/cpp/actors/core/executor_pool_basic.h +++ b/library/cpp/actors/core/executor_pool_basic.h @@ -62,7 +62,7 @@ namespace NActors {          TAtomic ThreadUtilization;          TAtomic MaxUtilizationCounter;          TAtomic MaxUtilizationAccumulator; - +           TAtomic ThreadCount;          TMutex ChangeThreadsLock; @@ -81,7 +81,7 @@ namespace NActors {                             ui32 maxActivityType = 1);          explicit TBasicExecutorPool(const TBasicExecutorPoolConfig& cfg);          ~TBasicExecutorPool(); - +           ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingReadCounter) override;          void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override; diff --git a/library/cpp/actors/core/executor_pool_united.cpp b/library/cpp/actors/core/executor_pool_united.cpp index dac6245635d..e5968609e7f 100644 --- a/library/cpp/actors/core/executor_pool_united.cpp +++ b/library/cpp/actors/core/executor_pool_united.cpp @@ -14,7 +14,7 @@  #include <util/system/datetime.h>  #include <util/system/hp_timer.h> - +   #include <algorithm>  namespace NActors { @@ -1315,7 +1315,7 @@ namespace NActors {              if (Y_UNLIKELY(result == CpuStopped) || TryAcquireToken(result)) {                  break; // token acquired (or stop)              } -        } +        }           wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timeTracker.Elapsed());          return result; diff --git a/library/cpp/actors/core/executor_pool_united.h b/library/cpp/actors/core/executor_pool_united.h index a090ba24665..01be95b778b 100644 --- a/library/cpp/actors/core/executor_pool_united.h +++ b/library/cpp/actors/core/executor_pool_united.h @@ -63,7 +63,7 @@ namespace NActors {          // Sets executor for specified pool          void SetupPool(TPoolId pool, IExecutorPool* executorPool, TMailboxTable* mailboxTable); - +           // Add activation of newly scheduled mailbox and wake cpu to execute it if required          void PushActivation(TPoolId pool, ui32 activation, ui64 revolvingCounter); @@ -72,7 +72,7 @@ namespace NActors {          // Try to wake idle cpu waiting for tokens on specified pool          void TryWake(TPoolId pool); - +           // Get activation from pool; requires pool's token          void BeginExecution(TPoolId pool, ui32& activation, ui64 revolvingCounter); diff --git a/library/cpp/actors/core/executor_thread.cpp b/library/cpp/actors/core/executor_thread.cpp index 446b651efd2..ac97689f311 100644 --- a/library/cpp/actors/core/executor_thread.cpp +++ b/library/cpp/actors/core/executor_thread.cpp @@ -303,7 +303,7 @@ namespace NActors {          ExecutorPool->SetRealTimeMode();          TAffinityGuard affinity(ExecutorPool->Affinity()); - +           NHPTimer::STime hpnow = GetCycleCountFast();          NHPTimer::STime hpprev = hpnow;          ui64 execCount = 0; diff --git a/library/cpp/actors/core/executor_thread.h b/library/cpp/actors/core/executor_thread.h index 9d3c573f0d6..66b97bd3513 100644 --- a/library/cpp/actors/core/executor_thread.h +++ b/library/cpp/actors/core/executor_thread.h @@ -45,7 +45,7 @@ namespace NActors {          void UnregisterActor(TMailboxHeader* mailbox, ui64 localActorId);          void DropUnregistered();          const std::vector<THolder<IActor>>& GetUnregistered() const { return DyingActors; } - +           void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr);          void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr);          void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr); diff --git a/library/cpp/actors/core/log.cpp b/library/cpp/actors/core/log.cpp index 5f63b5af580..bfac7d30e43 100644 --- a/library/cpp/actors/core/log.cpp +++ b/library/cpp/actors/core/log.cpp @@ -195,7 +195,7 @@ namespace NActors {          , Metrics(std::make_unique<TLoggerMetrics>(metrics))      {      } - +       TLoggerActor::TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,                                 std::shared_ptr<TLogBackend> logBackend,                                 std::shared_ptr<NMonitoring::TMetricRegistry> metrics) @@ -260,8 +260,8 @@ namespace NActors {                  break;              default:                  break; -        } - +        }  +       }      void TLoggerActor::HandleLogEvent(NLog::TEvLog::TPtr& ev, const NActors::TActorContext& ctx) { diff --git a/library/cpp/actors/core/log.h b/library/cpp/actors/core/log.h index c11a7cf3c19..514ff51c14d 100644 --- a/library/cpp/actors/core/log.h +++ b/library/cpp/actors/core/log.h @@ -42,7 +42,7 @@                  actorCtxOrSystem, priority, component, __VA_ARGS__);                                                           \          }                                                                                                                      \      } while (0) /**/ - +   #define LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, priority, component, sampleBy, stream)  \      LOG_LOG_SAMPLED_BY(actorCtxOrSystem, priority, component, sampleBy, "%s", [&]() { \          TStringBuilder logStringBuilder;                                               \ @@ -304,7 +304,7 @@ namespace NActors {      /////////////////////////////////////////////////////////////////////      //  Logging adaptors for memory log and logging into filesystem      ///////////////////////////////////////////////////////////////////// - +       namespace NDetail {          inline void Y_PRINTF_FORMAT(2, 3) PrintfV(TString& dst, const char* format, ...) {              va_list params; @@ -318,7 +318,7 @@ namespace NActors {          }      } // namespace NDetail -    template <typename TCtx> +    template <typename TCtx>       inline void DeliverLogMessage(TCtx& ctx, NLog::EPriority mPriority, NLog::EComponent mComponent, TString &&str)      {          const NLog::TSettings *mSettings = ctx.LoggerSettings(); @@ -327,14 +327,14 @@ namespace NActors {      }      template <typename TCtx, typename... TArgs> -    inline void MemLogAdapter( +    inline void MemLogAdapter(           TCtx& actorCtxOrSystem,          NLog::EPriority mPriority,          NLog::EComponent mComponent,          const char* format, TArgs&&... params) {          TString Formatted; - - +  +           if constexpr (sizeof... (params) > 0) {              NDetail::PrintfV(Formatted, format, std::forward<TArgs>(params)...);          } else { @@ -343,9 +343,9 @@ namespace NActors {          MemLogWrite(Formatted.data(), Formatted.size(), true);          DeliverLogMessage(actorCtxOrSystem, mPriority, mComponent, std::move(Formatted)); -    } - -    template <typename TCtx> +    }  +  +    template <typename TCtx>       Y_WRAPPER inline void MemLogAdapter(          TCtx& actorCtxOrSystem,          NLog::EPriority mPriority, @@ -355,7 +355,7 @@ namespace NActors {          MemLogWrite(str.data(), str.size(), true);          DeliverLogMessage(actorCtxOrSystem, mPriority, mComponent, TString(str));      } - +       template <typename TCtx>      Y_WRAPPER inline void MemLogAdapter(          TCtx& actorCtxOrSystem, @@ -365,5 +365,5 @@ namespace NActors {          MemLogWrite(str.data(), str.size(), true);          DeliverLogMessage(actorCtxOrSystem, mPriority, mComponent, std::move(str)); -    } +    }   } diff --git a/library/cpp/actors/core/mailbox.cpp b/library/cpp/actors/core/mailbox.cpp index d84b4f9e466..ac598eff863 100644 --- a/library/cpp/actors/core/mailbox.cpp +++ b/library/cpp/actors/core/mailbox.cpp @@ -214,49 +214,49 @@ namespace NActors {                      return true;                  case TMailboxType::HTSwap: {                      THTSwapMailbox* const mailbox = THTSwapMailbox::Get(lineHint, x); -#if (!defined(_tsan_enabled_)) +#if (!defined(_tsan_enabled_))                       Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType); -#endif +#endif                       mailbox->Queue.Push(ev.Release());                      if (mailbox->MarkForSchedule()) {                          RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());                          executorPool->ScheduleActivation(hint);                      } -                } +                }                       return true;                  case TMailboxType::ReadAsFilled: {                      if (lineHint > TReadAsFilledMailbox::MaxMailboxesInLine())                          return false; - +                       TReadAsFilledMailbox* const mailbox = TReadAsFilledMailbox::Get(lineHint, x); -#if (!defined(_tsan_enabled_)) +#if (!defined(_tsan_enabled_))                       Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType); -#endif +#endif                       mailbox->Queue.Push(ev.Release());                      if (mailbox->MarkForSchedule()) {                          RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());                          executorPool->ScheduleActivation(hint);                      } -                } +                }                       return true;                  case TMailboxType::TinyReadAsFilled: {                      if (lineHint > TTinyReadAsFilledMailbox::MaxMailboxesInLine())                          return false; - +                       TTinyReadAsFilledMailbox* const mailbox = TTinyReadAsFilledMailbox::Get(lineHint, x); -#if (!defined(_tsan_enabled_)) +#if (!defined(_tsan_enabled_))                       Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType); -#endif +#endif                       mailbox->Queue.Push(ev.Release());                      if (mailbox->MarkForSchedule()) {                          RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());                          executorPool->ScheduleActivation(hint);                      } -                } +                }                       return true;                  default:                      Y_FAIL("unknown mailbox type"); -            } +            }           }          return false; diff --git a/library/cpp/actors/core/mailbox.h b/library/cpp/actors/core/mailbox.h index 0bd9c4d314e..8a2c0d06083 100644 --- a/library/cpp/actors/core/mailbox.h +++ b/library/cpp/actors/core/mailbox.h @@ -10,7 +10,7 @@  #include <library/cpp/threading/queue/mpsc_read_as_filled.h>  #include <util/generic/hash.h>  #include <util/system/hp_timer.h> -#include <util/generic/ptr.h> +#include <util/generic/ptr.h>   // TODO: clean all broken arcadia atomic stuff and replace with intrinsics  namespace NActors { @@ -389,52 +389,52 @@ namespace NActors {              constexpr static ui32 AlignedSize() {                  return ((sizeof(TRevolvingMailbox) + 63) / 64) * 64;              } - +               std::pair<ui32, ui32> CountRevolvingMailboxEvents(ui64 localActorId, ui32 maxTraverse);              bool CleanupEvents();          }; - +           static_assert(sizeof(TRevolvingMailbox) == 128, "expect sizeof(TRevolvingMailbox) == 128"); - +           struct THTSwapMailbox: public TMailboxHeader {              using TQueueType = NThreading::THTSwapQueue<IEventHandle*>; - +               TQueueType Queue;              NHPTimer::STime ScheduleMoment;              char Padding_[16]; - +               THTSwapMailbox()                  : TMailboxHeader(TMailboxType::HTSwap)                  , ScheduleMoment(0)              {              } - +               ~THTSwapMailbox() {                  CleanupEvents();              } - +               IEventHandle* Pop() {                  return Queue.Pop();              } - +               IEventHandle* Head() {                  return Queue.Peek();              } - +               static THTSwapMailbox* Get(ui32 hint, void* line) {                  return (THTSwapMailbox*)((ui8*)line + 64 + (hint - 1) * 64);              } - +               constexpr static ui64 MaxMailboxesInLine() {                  return (LineSize - 64) / AlignedSize();              } - +               static const TMailboxType::EType MailboxType = TMailboxType::HTSwap; - +               constexpr static ui32 AlignedSize() {                  return ((sizeof(THTSwapMailbox) + 63) / 64) * 64;              } - +               bool CleanupEvents() {                  const bool done = (Queue.Peek() == nullptr);                  while (IEventHandle* ev = Queue.Pop()) @@ -442,50 +442,50 @@ namespace NActors {                  return done;              }          }; - +           static_assert(sizeof(THTSwapMailbox) == 64,                        "expect sizeof(THTSwapMailbox) == 64"); - +           struct TReadAsFilledMailbox: public TMailboxHeader {              using TQueueType = NThreading::TReadAsFilledQueue<IEventHandle>; - +               TQueueType Queue;              NHPTimer::STime ScheduleMoment;              char Padding_[8]; - +               TReadAsFilledMailbox()                  : TMailboxHeader(TMailboxType::ReadAsFilled)                  , ScheduleMoment(0)              {              } - +               ~TReadAsFilledMailbox() {                  CleanupEvents();              } - +               IEventHandle* Pop() {                  return Queue.Pop();              } - +               IEventHandle* Head() {                  return Queue.Peek();              } - +               static TReadAsFilledMailbox* Get(ui32 hint, void* line) {                  return (TReadAsFilledMailbox*)((ui8*)line + 64 + (hint - 1) * 192);              } - +               constexpr static ui64 MaxMailboxesInLine() {                  return (LineSize - 64) / AlignedSize();              } - +               static const TMailboxType::EType MailboxType =                  TMailboxType::ReadAsFilled; - +               constexpr static ui32 AlignedSize() {                  return ((sizeof(TReadAsFilledMailbox) + 63) / 64) * 64;              } - +               bool CleanupEvents() {                  const bool done = (Queue.Peek() == nullptr);                  while (IEventHandle* ev = Queue.Pop()) @@ -493,52 +493,52 @@ namespace NActors {                  return done;              }          }; - +           static_assert(sizeof(TReadAsFilledMailbox) == 192,                        "expect sizeof(TReadAsFilledMailbox) == 192"); - +           struct TTinyReadAsFilledMailbox: public TMailboxHeader {              using TQueueType = NThreading::TReadAsFilledQueue<                  IEventHandle,                  NThreading::TRaFQueueBunchSize<4>>; - +               TQueueType Queue;              NHPTimer::STime ScheduleMoment;              char Padding_[8]; - +               TTinyReadAsFilledMailbox()                  : TMailboxHeader(TMailboxType::TinyReadAsFilled)                  , ScheduleMoment(0)              {              } - +               ~TTinyReadAsFilledMailbox() {                  CleanupEvents();              } - +               IEventHandle* Pop() {                  return Queue.Pop();              } - +               IEventHandle* Head() {                  return Queue.Peek();              } - +               static TTinyReadAsFilledMailbox* Get(ui32 hint, void* line) {                  return (TTinyReadAsFilledMailbox*)((ui8*)line + 64 + (hint - 1) * 192);              } - +               constexpr static ui64 MaxMailboxesInLine() {                  return (LineSize - 64) / AlignedSize();              } - +               static const TMailboxType::EType MailboxType =                  TMailboxType::TinyReadAsFilled; - +               constexpr static ui32 AlignedSize() {                  return ((sizeof(TTinyReadAsFilledMailbox) + 63) / 64) * 64;              } - +               bool CleanupEvents() {                  const bool done = (Queue.Peek() == nullptr);                  while (IEventHandle* ev = Queue.Pop()) @@ -546,8 +546,8 @@ namespace NActors {                  return done;              }          }; - +           static_assert(sizeof(TTinyReadAsFilledMailbox) == 192,                        "expect sizeof(TTinyReadAsFilledMailbox) == 192"); -    }; +    };   } diff --git a/library/cpp/actors/core/mon.h b/library/cpp/actors/core/mon.h index c450f2338eb..3ebf6a0bed6 100644 --- a/library/cpp/actors/core/mon.h +++ b/library/cpp/actors/core/mon.h @@ -123,7 +123,7 @@ namespace NActors {                  return true;              } -            static IEventBase* Load(TEventSerializedData* bufs) { +            static IEventBase* Load(TEventSerializedData* bufs) {                   return new TEvRemoteHttpInfo(bufs->GetString());              } @@ -160,7 +160,7 @@ namespace NActors {                  return true;              } -            static IEventBase* Load(TEventSerializedData* bufs) { +            static IEventBase* Load(TEventSerializedData* bufs) {                   return new TEvRemoteHttpInfoRes(bufs->GetString());              }          }; @@ -192,7 +192,7 @@ namespace NActors {                  return true;              } -            static IEventBase* Load(TEventSerializedData* bufs) { +            static IEventBase* Load(TEventSerializedData* bufs) {                   return new TEvRemoteJsonInfoRes(bufs->GetString());              }          }; diff --git a/library/cpp/actors/core/mon_stats.h b/library/cpp/actors/core/mon_stats.h index d55552af0cb..f1d66664b6c 100644 --- a/library/cpp/actors/core/mon_stats.h +++ b/library/cpp/actors/core/mon_stats.h @@ -13,17 +13,17 @@ namespace NActors {          inline void Add(ui64 val, ui64 inc = 1) {              size_t ind = 0; -#if defined(__clang__) && __clang_major__ == 3 && __clang_minor__ == 7 +#if defined(__clang__) && __clang_major__ == 3 && __clang_minor__ == 7               asm volatile("" ::                               : "memory"); -#endif +#endif               if (val > 1) {                  ind = GetValueBitCount(val - 1);              } -#if defined(__clang__) && __clang_major__ == 3 && __clang_minor__ == 7 +#if defined(__clang__) && __clang_major__ == 3 && __clang_minor__ == 7               asm volatile("" ::                               : "memory"); -#endif +#endif               RelaxedStore(&TotalSamples, RelaxedLoad(&TotalSamples) + inc);              RelaxedStore(&Buckets[ind], RelaxedLoad(&Buckets[ind]) + inc);          } diff --git a/library/cpp/actors/core/ya.make b/library/cpp/actors/core/ya.make index 880a9d00dba..22155dbeecc 100644 --- a/library/cpp/actors/core/ya.make +++ b/library/cpp/actors/core/ya.make @@ -32,8 +32,8 @@ SRCS(      ask.h      balancer.h      balancer.cpp -    buffer.cpp -    buffer.h +    buffer.cpp  +    buffer.h       callstack.cpp      callstack.h      config.h @@ -45,7 +45,7 @@ SRCS(      event.h      event_load.h      event_local.h -    event_pb.cpp +    event_pb.cpp       event_pb.h      events.h      events_undelivered.cpp diff --git a/library/cpp/actors/dnscachelib/dnscache.cpp b/library/cpp/actors/dnscachelib/dnscache.cpp index 649339ddb22..580956c92ed 100644 --- a/library/cpp/actors/dnscachelib/dnscache.cpp +++ b/library/cpp/actors/dnscachelib/dnscache.cpp @@ -155,19 +155,19 @@ void TDnsCache::GetStats(ui64& a_cache_hits, ui64& a_cache_misses,  }  bool TDnsCache::THost::IsStale(int family, const TDnsCache* ctx) const noexcept { -    time_t resolved = family == AF_INET ? ResolvedV4 : ResolvedV6; -    time_t notfound = family == AF_INET ? NotFoundV4 : NotFoundV6; - -    if (TTimeKeeper::GetTime() - resolved < ctx->EntryLifetime) -        return false; - -    if (TTimeKeeper::GetTime() - notfound < ctx->NegativeLifetime) -        return false; - -    return true; -} - -const TDnsCache::THost& +    time_t resolved = family == AF_INET ? ResolvedV4 : ResolvedV6;  +    time_t notfound = family == AF_INET ? NotFoundV4 : NotFoundV6;  +  +    if (TTimeKeeper::GetTime() - resolved < ctx->EntryLifetime)  +        return false;  +  +    if (TTimeKeeper::GetTime() - notfound < ctx->NegativeLifetime)  +        return false;  +  +    return true;  +}  +  +const TDnsCache::THost&   TDnsCache::Resolve(const TString& hostname, int family, bool cacheOnly) {      if (!ValidateHName(hostname)) {          LWPROBE(ResolveNullHost, hostname, family); @@ -182,7 +182,7 @@ TDnsCache::Resolve(const TString& hostname, int family, bool cacheOnly) {          TGuard<TMutex> lock(CacheMtx);          p = HostCache.find(hostname);          if (p != HostCache.end()) { -            if (!p->second.IsStale(family, this)) { +            if (!p->second.IsStale(family, this)) {                   /* Recently resolved, just return cached value */                  ACacheHits += 1;                  THost& host = p->second; @@ -199,9 +199,9 @@ TDnsCache::Resolve(const TString& hostname, int family, bool cacheOnly) {          ACacheMisses += 1;      } -    if (cacheOnly) -        return NullHost; - +    if (cacheOnly)  +        return NullHost;  +       TAtomic& inprogress = (family == AF_INET ? p->second.InProgressV4 : p->second.InProgressV6);      { @@ -219,7 +219,7 @@ TDnsCache::Resolve(const TString& hostname, int family, bool cacheOnly) {              ctx->Hostname = hostname;              ctx->Family = family; -            AtomicSet(inprogress, 1); +            AtomicSet(inprogress, 1);               ares_gethostbyname(chan, hostname.c_str(), family,                                 &TDnsCache::GHBNCallback, ctx);          } @@ -269,7 +269,7 @@ const TDnsCache::TAddr& TDnsCache::ResolveAddr(const in6_addr& addr, int family)              ctx->Owner = this;              ctx->Addr = addr; -            AtomicSet(p->second.InProgress, 1); +            AtomicSet(p->second.InProgress, 1);               ares_gethostbyaddr(chan, &addr,                                 family == AF_INET ? sizeof(in_addr) : sizeof(in6_addr),                                 family, &TDnsCache::GHBACallback, ctx); @@ -284,7 +284,7 @@ const TDnsCache::TAddr& TDnsCache::ResolveAddr(const in6_addr& addr, int family)  void TDnsCache::WaitTask(TAtomic& flag) {      const TInstant start = TInstant(TTimeKeeper::GetTimeval()); -    while (AtomicGet(flag)) { +    while (AtomicGet(flag)) {           ares_channel chan = static_cast<ares_channel>(Channel);          struct pollfd pfd[ARES_GETSOCK_MAXNUM]; @@ -380,7 +380,7 @@ void TDnsCache::GHBNCallback(void* arg, int status, int, struct hostent* info) {               */              p->second.ResolvedV4 = TTimeKeeper::GetTime();              p->second.ResolvedV4 = 0; -            AtomicSet(p->second.InProgressV4, 0); +            AtomicSet(p->second.InProgressV4, 0);           } else if (info->h_addrtype == AF_INET6) {              p->second.AddrsV6.clear();              for (int i = 0; info->h_addr_list[i] != nullptr; i++) { @@ -395,7 +395,7 @@ void TDnsCache::GHBNCallback(void* arg, int status, int, struct hostent* info) {          notfound = TTimeKeeper::GetTime();          resolved = 0;      } -    AtomicSet(inprogress, 0); +    AtomicSet(inprogress, 0);   }  void TDnsCache::GHBACallback(void* arg, int status, int, struct hostent* info) { @@ -413,7 +413,7 @@ void TDnsCache::GHBACallback(void* arg, int status, int, struct hostent* info) {          p->second.NotFound = TTimeKeeper::GetTime();          p->second.Resolved = 0;      } -    AtomicSet(p->second.InProgress, 0); +    AtomicSet(p->second.InProgress, 0);   }  TString TDnsCache::THost::AddrsV4ToString() const { @@ -441,5 +441,5 @@ TString TDnsCache::THost::AddrsV6ToString() const {      }      return ss.Str();  } - -TDnsCache::TAresLibInit TDnsCache::InitAresLib; +  +TDnsCache::TAresLibInit TDnsCache::InitAresLib;  diff --git a/library/cpp/actors/dnscachelib/dnscache.h b/library/cpp/actors/dnscachelib/dnscache.h index 3313a251a1c..586957b9a09 100644 --- a/library/cpp/actors/dnscachelib/dnscache.h +++ b/library/cpp/actors/dnscachelib/dnscache.h @@ -1,6 +1,6 @@  #pragma once -#include <contrib/libs/c-ares/ares.h> +#include <contrib/libs/c-ares/ares.h>   #include <util/generic/map.h>  #include <util/generic/vector.h>  #include <util/network/address.h> @@ -28,9 +28,9 @@ public:      /* use with AF_INET, AF_INET6 or AF_UNSPEC */      NAddr::IRemoteAddrPtr GetAddr(const TString& host, -                                  int family, -                                  TIpPort port = 0, -                                  bool cacheOnly = false); +                                  int family,  +                                  TIpPort port = 0,  +                                  bool cacheOnly = false);       void GetAllAddresses(const TString& host, TVector<NAddr::IRemoteAddrPtr>&); @@ -68,8 +68,8 @@ private:          TString AddrsV4ToString() const;          TString AddrsV6ToString() const; - -        bool IsStale(int family, const TDnsCache* ctx) const noexcept; +  +        bool IsStale(int family, const TDnsCache* ctx) const noexcept;       };      typedef TMap<TString, THost> THostCache; @@ -99,9 +99,9 @@ private:      typedef TMap<in6_addr, TAddr, TAddrCmp> TAddrCache;      const THost& Resolve(const TString&, int family, bool cacheOnly = false); - +       const TAddr& ResolveAddr(const in6_addr&, int family); - +       void WaitTask(TAtomic&);      static void GHBNCallback(void* arg, int status, int timeouts, @@ -128,21 +128,21 @@ private:      TMutex AresMtx;      void* Channel; - -    struct TAresLibInit { -        TAresLibInit() { +  +    struct TAresLibInit {  +        TAresLibInit() {   #ifdef _win_ -            const auto res = ares_library_init(ARES_LIB_INIT_ALL); -            Y_VERIFY(res == 0); +            const auto res = ares_library_init(ARES_LIB_INIT_ALL);  +            Y_VERIFY(res == 0);   #endif -        } - -        ~TAresLibInit() { +        }  +  +        ~TAresLibInit() {   #ifdef _win_ -            ares_library_cleanup(); +            ares_library_cleanup();   #endif -        } -    }; - -    static TAresLibInit InitAresLib; +        }  +    };  +  +    static TAresLibInit InitAresLib;   }; diff --git a/library/cpp/actors/memory_log/memlog.cpp b/library/cpp/actors/memory_log/memlog.cpp index 8e6b46727d6..f20162db70f 100644 --- a/library/cpp/actors/memory_log/memlog.cpp +++ b/library/cpp/actors/memory_log/memlog.cpp @@ -1,28 +1,28 @@ -#include "memlog.h" - +#include "memlog.h"  +   #include <library/cpp/actors/util/datetime.h> -#include <util/system/info.h> -#include <util/system/atomic.h> -#include <util/system/align.h> - -#include <contrib/libs/linuxvdso/interface.h> - -#if (defined(_i386_) || defined(_x86_64_)) && defined(_linux_) -#define HAVE_VDSO_GETCPU 1 -#include <contrib/libs/linuxvdso/interface.h> -static int (*FastGetCpu)(unsigned* cpu, unsigned* node, void* unused); -#endif - -#if defined(_unix_) +#include <util/system/info.h>  +#include <util/system/atomic.h>  +#include <util/system/align.h>  +  +#include <contrib/libs/linuxvdso/interface.h>  +  +#if (defined(_i386_) || defined(_x86_64_)) && defined(_linux_)  +#define HAVE_VDSO_GETCPU 1  +#include <contrib/libs/linuxvdso/interface.h>  +static int (*FastGetCpu)(unsigned* cpu, unsigned* node, void* unused);  +#endif  +  +#if defined(_unix_)   #include <sched.h> -#elif defined(_win_) +#elif defined(_win_)   #include <WinBase.h> -#else +#else   #error NO IMPLEMENTATION FOR THE PLATFORM -#endif - -const char TMemoryLog::DEFAULT_LAST_MARK[16] = { +#endif  +  +const char TMemoryLog::DEFAULT_LAST_MARK[16] = {       'c',      'b',      '7', @@ -39,9 +39,9 @@ const char TMemoryLog::DEFAULT_LAST_MARK[16] = {      '4',      '5',      '\n', -}; - -const char TMemoryLog::CLEAR_MARK[16] = { +};  +  +const char TMemoryLog::CLEAR_MARK[16] = {       ' ',      ' ',      ' ', @@ -58,146 +58,146 @@ const char TMemoryLog::CLEAR_MARK[16] = {      ' ',      ' ',      '\n', -}; - -unsigned TMemoryLog::GetSelfCpu() noexcept { -#if defined(_unix_) +};  +  +unsigned TMemoryLog::GetSelfCpu() noexcept {  +#if defined(_unix_)   #if HAVE_VDSO_GETCPU -    unsigned cpu; -    if (Y_LIKELY(FastGetCpu != nullptr)) { -        auto result = FastGetCpu(&cpu, nullptr, nullptr); -        Y_VERIFY(result == 0); +    unsigned cpu;  +    if (Y_LIKELY(FastGetCpu != nullptr)) {  +        auto result = FastGetCpu(&cpu, nullptr, nullptr);  +        Y_VERIFY(result == 0);           return cpu; -    } else { -        return 0; -    } - +    } else {  +        return 0;  +    }  +   #elif defined(_x86_64_) || defined(_i386_) - +   #define CPUID(func, eax, ebx, ecx, edx)              \      __asm__ __volatile__(                            \          "cpuid"                                      \          : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) \          : "a"(func)); - -    int a = 0, b = 0, c = 0, d = 0; -    CPUID(0x1, a, b, c, d); -    int acpiID = (b >> 24); -    return acpiID; - +  +    int a = 0, b = 0, c = 0, d = 0;  +    CPUID(0x1, a, b, c, d);  +    int acpiID = (b >> 24);  +    return acpiID;  +   #elif defined(__CNUC__) -    return sched_getcpu(); +    return sched_getcpu();   #else -    return 0; +    return 0;   #endif - -#elif defined(_win_) -    return GetCurrentProcessorNumber(); -#else -    return 0; -#endif -} - -TMemoryLog* TMemoryLog::MemLogBuffer = nullptr; +  +#elif defined(_win_)  +    return GetCurrentProcessorNumber();  +#else  +    return 0;  +#endif  +}  +  +TMemoryLog* TMemoryLog::MemLogBuffer = nullptr;   Y_POD_THREAD(TThread::TId)  TMemoryLog::LogThreadId; -char* TMemoryLog::LastMarkIsHere = nullptr; - -std::atomic<bool> TMemoryLog::PrintLastMark(true); - +char* TMemoryLog::LastMarkIsHere = nullptr;  +  +std::atomic<bool> TMemoryLog::PrintLastMark(true);  +   TMemoryLog::TMemoryLog(size_t totalSize, size_t grainSize)      : GrainSize(grainSize)      , FreeGrains(DEFAULT_TOTAL_SIZE / DEFAULT_GRAIN_SIZE * 2)      , Buf(totalSize) -{ -    Y_VERIFY(DEFAULT_TOTAL_SIZE % DEFAULT_GRAIN_SIZE == 0); -    NumberOfGrains = DEFAULT_TOTAL_SIZE / DEFAULT_GRAIN_SIZE; - -    for (size_t i = 0; i < NumberOfGrains; ++i) { -        new (GetGrain(i)) TGrain; -    } - -    NumberOfCpus = NSystemInfo::NumberOfCpus(); -    Y_VERIFY(NumberOfGrains > NumberOfCpus); -    ActiveGrains.Reset(new TGrain*[NumberOfCpus]); -    for (size_t i = 0; i < NumberOfCpus; ++i) { -        ActiveGrains[i] = GetGrain(i); -    } - -    for (size_t i = NumberOfCpus; i < NumberOfGrains; ++i) { -        FreeGrains.StubbornPush(GetGrain(i)); -    } - -#if HAVE_VDSO_GETCPU -    auto vdsoFunc = (decltype(FastGetCpu)) -        NVdso::Function("__vdso_getcpu", "LINUX_2.6"); -    AtomicSet(FastGetCpu, vdsoFunc); -#endif -} - -void* TMemoryLog::GetWriteBuffer(size_t amount) noexcept { -    // alignment required by NoCacheMemcpy -    amount = AlignUp<size_t>(amount, MemcpyAlignment); - -    for (ui16 tries = MAX_GET_BUFFER_TRIES; tries-- > 0;) { -        auto myCpu = GetSelfCpu(); - -        TGrain* grain = AtomicGet(ActiveGrains[myCpu]); - -        if (grain != nullptr) { -            auto mine = AtomicGetAndAdd(grain->WritePointer, amount); -            if (mine + amount <= GrainSize - sizeof(TGrain)) { -                return &grain->Data[mine]; -            } - -            if (!AtomicCas(&ActiveGrains[myCpu], 0, grain)) { -                continue; -            } - -            FreeGrains.StubbornPush(grain); -        } - -        grain = (TGrain*)FreeGrains.Pop(); - -        if (grain == nullptr) { -            return nullptr; -        } - -        grain->WritePointer = 0; - -        if (!AtomicCas(&ActiveGrains[myCpu], grain, 0)) { -            FreeGrains.StubbornPush(grain); -            continue; -        } -    } - -    return nullptr; -} - -void ClearAlignedTail(char* tail) noexcept { -    auto aligned = AlignUp(tail, TMemoryLog::MemcpyAlignment); -    if (aligned > tail) { -        memset(tail, 0, aligned - tail); -    } -} - -#if defined(_x86_64_) || defined(_i386_) -#include <xmmintrin.h> -// the main motivation is not poluting CPU cache -NO_SANITIZE_THREAD -void NoCacheMemcpy(char* dst, const char* src, size_t size) noexcept { -    while (size >= sizeof(__m128) * 2) { -        __m128 a = _mm_load_ps((float*)(src + 0 * sizeof(__m128))); -        __m128 b = _mm_load_ps((float*)(src + 1 * sizeof(__m128))); -        _mm_stream_ps((float*)(dst + 0 * sizeof(__m128)), a); -        _mm_stream_ps((float*)(dst + 1 * sizeof(__m128)), b); - -        size -= sizeof(__m128) * 2; -        src += sizeof(__m128) * 2; -        dst += sizeof(__m128) * 2; -    } -    memcpy(dst, src, size); -} +{  +    Y_VERIFY(DEFAULT_TOTAL_SIZE % DEFAULT_GRAIN_SIZE == 0);  +    NumberOfGrains = DEFAULT_TOTAL_SIZE / DEFAULT_GRAIN_SIZE;  +  +    for (size_t i = 0; i < NumberOfGrains; ++i) {  +        new (GetGrain(i)) TGrain;  +    }  +  +    NumberOfCpus = NSystemInfo::NumberOfCpus();  +    Y_VERIFY(NumberOfGrains > NumberOfCpus);  +    ActiveGrains.Reset(new TGrain*[NumberOfCpus]);  +    for (size_t i = 0; i < NumberOfCpus; ++i) {  +        ActiveGrains[i] = GetGrain(i);  +    }  +  +    for (size_t i = NumberOfCpus; i < NumberOfGrains; ++i) {  +        FreeGrains.StubbornPush(GetGrain(i));  +    }  +  +#if HAVE_VDSO_GETCPU  +    auto vdsoFunc = (decltype(FastGetCpu))  +        NVdso::Function("__vdso_getcpu", "LINUX_2.6");  +    AtomicSet(FastGetCpu, vdsoFunc);  +#endif  +}  +  +void* TMemoryLog::GetWriteBuffer(size_t amount) noexcept {  +    // alignment required by NoCacheMemcpy  +    amount = AlignUp<size_t>(amount, MemcpyAlignment);  +  +    for (ui16 tries = MAX_GET_BUFFER_TRIES; tries-- > 0;) {  +        auto myCpu = GetSelfCpu();  +  +        TGrain* grain = AtomicGet(ActiveGrains[myCpu]);  +  +        if (grain != nullptr) {  +            auto mine = AtomicGetAndAdd(grain->WritePointer, amount);  +            if (mine + amount <= GrainSize - sizeof(TGrain)) {  +                return &grain->Data[mine];  +            }  +  +            if (!AtomicCas(&ActiveGrains[myCpu], 0, grain)) {  +                continue;  +            }  +  +            FreeGrains.StubbornPush(grain);  +        }  +  +        grain = (TGrain*)FreeGrains.Pop();  +  +        if (grain == nullptr) {  +            return nullptr;  +        }  +  +        grain->WritePointer = 0;  +  +        if (!AtomicCas(&ActiveGrains[myCpu], grain, 0)) {  +            FreeGrains.StubbornPush(grain);  +            continue;  +        }  +    }  +  +    return nullptr;  +}  +  +void ClearAlignedTail(char* tail) noexcept {  +    auto aligned = AlignUp(tail, TMemoryLog::MemcpyAlignment);  +    if (aligned > tail) {  +        memset(tail, 0, aligned - tail);  +    }  +}  +  +#if defined(_x86_64_) || defined(_i386_)  +#include <xmmintrin.h>  +// the main motivation is not poluting CPU cache  +NO_SANITIZE_THREAD  +void NoCacheMemcpy(char* dst, const char* src, size_t size) noexcept {  +    while (size >= sizeof(__m128) * 2) {  +        __m128 a = _mm_load_ps((float*)(src + 0 * sizeof(__m128)));  +        __m128 b = _mm_load_ps((float*)(src + 1 * sizeof(__m128)));  +        _mm_stream_ps((float*)(dst + 0 * sizeof(__m128)), a);  +        _mm_stream_ps((float*)(dst + 1 * sizeof(__m128)), b);  +  +        size -= sizeof(__m128) * 2;  +        src += sizeof(__m128) * 2;  +        dst += sizeof(__m128) * 2;  +    }  +    memcpy(dst, src, size);  +}   NO_SANITIZE_THREAD  void NoWCacheMemcpy(char* dst, const char* src, size_t size) noexcept { @@ -224,144 +224,144 @@ void NoWCacheMemcpy(char* dst, const char* src, size_t size) noexcept {      }  } -#endif - -NO_SANITIZE_THREAD -char* BareMemLogWrite(const char* begin, size_t msgSize, bool isLast) noexcept { -    bool lastMark = -        isLast && TMemoryLog::PrintLastMark.load(std::memory_order_acquire); -    size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize; - -    char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount); -    if (buffer == nullptr) { -        return nullptr; -    } - -#if defined(_x86_64_) || defined(_i386_) -    if (AlignDown(begin, TMemoryLog::MemcpyAlignment) == begin) { -        NoCacheMemcpy(buffer, begin, msgSize); +#endif  +  +NO_SANITIZE_THREAD  +char* BareMemLogWrite(const char* begin, size_t msgSize, bool isLast) noexcept {  +    bool lastMark =  +        isLast && TMemoryLog::PrintLastMark.load(std::memory_order_acquire);  +    size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize;  +  +    char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount);  +    if (buffer == nullptr) {  +        return nullptr;  +    }  +  +#if defined(_x86_64_) || defined(_i386_)  +    if (AlignDown(begin, TMemoryLog::MemcpyAlignment) == begin) {  +        NoCacheMemcpy(buffer, begin, msgSize);       } else {          NoWCacheMemcpy(buffer, begin, msgSize);      }  #else      memcpy(buffer, begin, msgSize);  #endif - -    if (lastMark) { -        TMemoryLog::ChangeLastMark(buffer + msgSize); -    } - -    ClearAlignedTail(buffer + amount); -    return buffer; -} - -NO_SANITIZE_THREAD -bool MemLogWrite(const char* begin, size_t msgSize, bool addLF) noexcept { -    bool lastMark = TMemoryLog::PrintLastMark.load(std::memory_order_acquire); -    size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize; - -    // Let's construct prolog with timestamp and thread id -    auto threadId = TMemoryLog::GetTheadId(); - -    // alignment required by NoCacheMemcpy -    // check for format for snprintf -    constexpr size_t prologSize = 48; +  +    if (lastMark) {  +        TMemoryLog::ChangeLastMark(buffer + msgSize);  +    }  +  +    ClearAlignedTail(buffer + amount);  +    return buffer;  +}  +  +NO_SANITIZE_THREAD  +bool MemLogWrite(const char* begin, size_t msgSize, bool addLF) noexcept {  +    bool lastMark = TMemoryLog::PrintLastMark.load(std::memory_order_acquire);  +    size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize;  +  +    // Let's construct prolog with timestamp and thread id  +    auto threadId = TMemoryLog::GetTheadId();  +  +    // alignment required by NoCacheMemcpy  +    // check for format for snprintf  +    constexpr size_t prologSize = 48;       alignas(TMemoryLog::MemcpyAlignment) char prolog[prologSize + 1];      Y_VERIFY(AlignDown(&prolog, TMemoryLog::MemcpyAlignment) == &prolog); - -    int snprintfResult = snprintf(prolog, prologSize + 1, +  +    int snprintfResult = snprintf(prolog, prologSize + 1,                                     "TS %020" PRIu64 " TI %020" PRIu64 " ", GetCycleCountFast(), threadId); - -    if (snprintfResult < 0) { -        return false; -    } -    Y_VERIFY(snprintfResult == prologSize); - -    amount += prologSize; -    if (addLF) { -        ++amount; // add 1 byte for \n at the end of the message -    } - -    char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount); -    if (buffer == nullptr) { -        return false; -    } - -#if defined(_x86_64_) || defined(_i386_) +  +    if (snprintfResult < 0) {  +        return false;  +    }  +    Y_VERIFY(snprintfResult == prologSize);  +  +    amount += prologSize;  +    if (addLF) {  +        ++amount; // add 1 byte for \n at the end of the message  +    }  +  +    char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount);  +    if (buffer == nullptr) {  +        return false;  +    }  +  +#if defined(_x86_64_) || defined(_i386_)       // warning: copy prolog first to avoid corruption of the message      // by prolog tail      NoCacheMemcpy(buffer, prolog, prologSize);      if (AlignDown(begin + prologSize, TMemoryLog::MemcpyAlignment) == begin + prologSize) {          NoCacheMemcpy(buffer + prologSize, begin, msgSize); -    } else { +    } else {           NoWCacheMemcpy(buffer + prologSize, begin, msgSize);      }  #else      memcpy(buffer, prolog, prologSize);      memcpy(buffer + prologSize, begin, msgSize);  #endif - -    if (addLF) { -        buffer[prologSize + msgSize] = '\n'; -    } - -    if (lastMark) { -        TMemoryLog::ChangeLastMark(buffer + prologSize + msgSize + (int)addLF); -    } - -    ClearAlignedTail(buffer + amount); -    return true; -} - -NO_SANITIZE_THREAD -void TMemoryLog::ChangeLastMark(char* buffer) noexcept { -    memcpy(buffer, DEFAULT_LAST_MARK, LAST_MARK_SIZE); -    auto oldMark = AtomicSwap(&LastMarkIsHere, buffer); -    if (Y_LIKELY(oldMark != nullptr)) { -        memcpy(oldMark, CLEAR_MARK, LAST_MARK_SIZE); -    } -    if (AtomicGet(LastMarkIsHere) != buffer) { -        memcpy(buffer, CLEAR_MARK, LAST_MARK_SIZE); -        AtomicBarrier(); -    } -} - -bool MemLogVPrintF(const char* format, va_list params) noexcept { -    auto logger = TMemoryLog::GetMemoryLogger(); -    if (logger == nullptr) { -        return false; -    } - -    auto threadId = TMemoryLog::GetTheadId(); - -    // alignment required by NoCacheMemcpy +  +    if (addLF) {  +        buffer[prologSize + msgSize] = '\n';  +    }  +  +    if (lastMark) {  +        TMemoryLog::ChangeLastMark(buffer + prologSize + msgSize + (int)addLF);  +    }  +  +    ClearAlignedTail(buffer + amount);  +    return true;  +}  +  +NO_SANITIZE_THREAD  +void TMemoryLog::ChangeLastMark(char* buffer) noexcept {  +    memcpy(buffer, DEFAULT_LAST_MARK, LAST_MARK_SIZE);  +    auto oldMark = AtomicSwap(&LastMarkIsHere, buffer);  +    if (Y_LIKELY(oldMark != nullptr)) {  +        memcpy(oldMark, CLEAR_MARK, LAST_MARK_SIZE);  +    }  +    if (AtomicGet(LastMarkIsHere) != buffer) {  +        memcpy(buffer, CLEAR_MARK, LAST_MARK_SIZE);  +        AtomicBarrier();  +    }  +}  +  +bool MemLogVPrintF(const char* format, va_list params) noexcept {  +    auto logger = TMemoryLog::GetMemoryLogger();  +    if (logger == nullptr) {  +        return false;  +    }  +  +    auto threadId = TMemoryLog::GetTheadId();  +  +    // alignment required by NoCacheMemcpy       alignas(TMemoryLog::MemcpyAlignment) char buf[TMemoryLog::MAX_MESSAGE_SIZE];      Y_VERIFY(AlignDown(&buf, TMemoryLog::MemcpyAlignment) == &buf); - +       int prologSize = snprintf(buf,                                TMemoryLog::MAX_MESSAGE_SIZE - 2,                                "TS %020" PRIu64 " TI %020" PRIu64 " ",                                GetCycleCountFast(),                                threadId); - -    if (Y_UNLIKELY(prologSize < 0)) { -        return false; -    } -    Y_VERIFY((ui32)prologSize <= TMemoryLog::MAX_MESSAGE_SIZE); - -    int add = vsnprintf( +  +    if (Y_UNLIKELY(prologSize < 0)) {  +        return false;  +    }  +    Y_VERIFY((ui32)prologSize <= TMemoryLog::MAX_MESSAGE_SIZE);  +  +    int add = vsnprintf(           &buf[prologSize], -        TMemoryLog::MAX_MESSAGE_SIZE - prologSize - 2, -        format, params); - -    if (Y_UNLIKELY(add < 0)) { -        return false; -    } -    Y_VERIFY(add >= 0); -    auto totalSize = prologSize + add; - +        TMemoryLog::MAX_MESSAGE_SIZE - prologSize - 2,  +        format, params);  +  +    if (Y_UNLIKELY(add < 0)) {  +        return false;  +    }  +    Y_VERIFY(add >= 0);  +    auto totalSize = prologSize + add;  +       buf[totalSize++] = '\n'; -    Y_VERIFY((ui32)totalSize <= TMemoryLog::MAX_MESSAGE_SIZE); - +    Y_VERIFY((ui32)totalSize <= TMemoryLog::MAX_MESSAGE_SIZE);  +       return BareMemLogWrite(buf, totalSize) != nullptr; -} +}  diff --git a/library/cpp/actors/memory_log/memlog.h b/library/cpp/actors/memory_log/memlog.h index 2aa27272a62..fe66efc4fb6 100644 --- a/library/cpp/actors/memory_log/memlog.h +++ b/library/cpp/actors/memory_log/memlog.h @@ -1,211 +1,211 @@ -#pragma once - +#pragma once  +   #include <library/cpp/threading/queue/mpmc_unordered_ring.h>  #include <util/generic/string.h> -#include <util/string/printf.h> -#include <util/system/datetime.h> -#include <util/system/thread.h> -#include <util/system/types.h> -#include <util/system/atomic.h> -#include <util/system/align.h> -#include <util/system/tls.h> - -#include <atomic> -#include <cstdio> - -#ifdef _win_ -#include <util/system/winint.h> -#endif - -#ifndef NO_SANITIZE_THREAD +#include <util/string/printf.h>  +#include <util/system/datetime.h>  +#include <util/system/thread.h>  +#include <util/system/types.h>  +#include <util/system/atomic.h>  +#include <util/system/align.h>  +#include <util/system/tls.h>  +  +#include <atomic>  +#include <cstdio>  +  +#ifdef _win_  +#include <util/system/winint.h>  +#endif  +  +#ifndef NO_SANITIZE_THREAD   #define NO_SANITIZE_THREAD  #if defined(__has_feature)  #if __has_feature(thread_sanitizer)  #undef NO_SANITIZE_THREAD  #define NO_SANITIZE_THREAD __attribute__((no_sanitize_thread)) +#endif   #endif  #endif -#endif - -class TMemoryLog { -public: -    static constexpr size_t DEFAULT_TOTAL_SIZE = 10 * 1024 * 1024; -    static constexpr size_t DEFAULT_GRAIN_SIZE = 1024 * 64; -    static constexpr size_t MAX_MESSAGE_SIZE = 1024; -    static constexpr ui16 MAX_GET_BUFFER_TRIES = 4; -    static constexpr ui16 MemcpyAlignment = 16; - -    // search for cb7B68a8A561645 -    static const char DEFAULT_LAST_MARK[16]; -    static const char CLEAR_MARK[16]; - -    static constexpr size_t LAST_MARK_SIZE = sizeof(DEFAULT_LAST_MARK); - -    inline static TMemoryLog* GetMemoryLogger() noexcept { -        return AtomicGet(MemLogBuffer); -    } - +  +class TMemoryLog {  +public:  +    static constexpr size_t DEFAULT_TOTAL_SIZE = 10 * 1024 * 1024;  +    static constexpr size_t DEFAULT_GRAIN_SIZE = 1024 * 64;  +    static constexpr size_t MAX_MESSAGE_SIZE = 1024;  +    static constexpr ui16 MAX_GET_BUFFER_TRIES = 4;  +    static constexpr ui16 MemcpyAlignment = 16;  +  +    // search for cb7B68a8A561645  +    static const char DEFAULT_LAST_MARK[16];  +    static const char CLEAR_MARK[16];  +  +    static constexpr size_t LAST_MARK_SIZE = sizeof(DEFAULT_LAST_MARK);  +  +    inline static TMemoryLog* GetMemoryLogger() noexcept {  +        return AtomicGet(MemLogBuffer);  +    }  +       void* GetWriteBuffer(size_t amount) noexcept; - -    inline static void* GetWriteBufferStatic(size_t amount) noexcept { -        auto logger = GetMemoryLogger(); -        if (logger == nullptr) { -            return nullptr; -        } -        return logger->GetWriteBuffer(amount); -    } - -    size_t GetGlobalBufferSize() const noexcept { -        return Buf.GetSize(); -    } - -    inline static void CreateMemoryLogBuffer( +  +    inline static void* GetWriteBufferStatic(size_t amount) noexcept {  +        auto logger = GetMemoryLogger();  +        if (logger == nullptr) {  +            return nullptr;  +        }  +        return logger->GetWriteBuffer(amount);  +    }  +  +    size_t GetGlobalBufferSize() const noexcept {  +        return Buf.GetSize();  +    }  +  +    inline static void CreateMemoryLogBuffer(           size_t totalSize = DEFAULT_TOTAL_SIZE,          size_t grainSize = DEFAULT_GRAIN_SIZE)          Y_COLD { -        if (AtomicGet(MemLogBuffer) != nullptr) { -            return; -        } - -        AtomicSet(MemLogBuffer, new TMemoryLog(totalSize, grainSize)); -    } - -    static std::atomic<bool> PrintLastMark; - -    // buffer must be at least 16 bytes +        if (AtomicGet(MemLogBuffer) != nullptr) {  +            return;  +        }  +  +        AtomicSet(MemLogBuffer, new TMemoryLog(totalSize, grainSize));  +    }  +  +    static std::atomic<bool> PrintLastMark;  +  +    // buffer must be at least 16 bytes       static void ChangeLastMark(char* buffer) noexcept; - -    inline static TThread::TId GetTheadId() noexcept { -        if (LogThreadId == 0) { -            LogThreadId = TThread::CurrentThreadId(); -        } -        return LogThreadId; -    } - -private: +  +    inline static TThread::TId GetTheadId() noexcept {  +        if (LogThreadId == 0) {  +            LogThreadId = TThread::CurrentThreadId();  +        }  +        return LogThreadId;  +    }  +  +private:       TMemoryLog(size_t totalSize, size_t grainSize) Y_COLD; - -    struct TGrain { -        TAtomic WritePointer = 0; -        char Padding[MemcpyAlignment - sizeof(TAtomic)]; -        char Data[]; -    }; - -    size_t NumberOfCpus; -    size_t GrainSize; -    size_t NumberOfGrains; -    TArrayPtr<TGrain*> ActiveGrains; -    NThreading::TMPMCUnorderedRing FreeGrains; - -    TGrain* GetGrain(size_t grainIndex) const noexcept { -        return (TGrain*)((char*)GetGlobalBuffer() + GrainSize * grainIndex); -    } - -    class TMMapArea { -    public: -        TMMapArea(size_t amount) Y_COLD { -            MMap(amount); -        } - -        TMMapArea(const TMMapArea&) = delete; -        TMMapArea& operator=(const TMMapArea& copy) = delete; - -        TMMapArea(TMMapArea&& move) Y_COLD { -            BufPtr = move.BufPtr; -            Size = move.Size; - -            move.BufPtr = nullptr; -            move.Size = 0; -        } - -        TMMapArea& operator=(TMMapArea&& move) Y_COLD { -            BufPtr = move.BufPtr; -            Size = move.Size; - -            move.BufPtr = nullptr; -            move.Size = 0; -            return *this; -        } - -        void Reset(size_t amount) Y_COLD { -            MUnmap(); -            MMap(amount); -        } - -        ~TMMapArea() noexcept Y_COLD { -            MUnmap(); -        } - -        size_t GetSize() const noexcept { -            return Size; -        } - -        void* GetPtr() const noexcept { -            return BufPtr; -        } - -    private: -        void* BufPtr; -        size_t Size; -#ifdef _win_ -        HANDLE Mapping; -#endif - -        void MMap(size_t amount); -        void MUnmap(); -    }; - -    TMMapArea Buf; - -    void* GetGlobalBuffer() const noexcept { -        return Buf.GetPtr(); -    } - -    static unsigned GetSelfCpu() noexcept; - -    static TMemoryLog* MemLogBuffer; -    static Y_POD_THREAD(TThread::TId) LogThreadId; -    static char* LastMarkIsHere; -}; - -// it's no use of sanitizing this function -NO_SANITIZE_THREAD +  +    struct TGrain {  +        TAtomic WritePointer = 0;  +        char Padding[MemcpyAlignment - sizeof(TAtomic)];  +        char Data[];  +    };  +  +    size_t NumberOfCpus;  +    size_t GrainSize;  +    size_t NumberOfGrains;  +    TArrayPtr<TGrain*> ActiveGrains;  +    NThreading::TMPMCUnorderedRing FreeGrains;  +  +    TGrain* GetGrain(size_t grainIndex) const noexcept {  +        return (TGrain*)((char*)GetGlobalBuffer() + GrainSize * grainIndex);  +    }  +  +    class TMMapArea {  +    public:  +        TMMapArea(size_t amount) Y_COLD {  +            MMap(amount);  +        }  +  +        TMMapArea(const TMMapArea&) = delete;  +        TMMapArea& operator=(const TMMapArea& copy) = delete;  +  +        TMMapArea(TMMapArea&& move) Y_COLD {  +            BufPtr = move.BufPtr;  +            Size = move.Size;  +  +            move.BufPtr = nullptr;  +            move.Size = 0;  +        }  +  +        TMMapArea& operator=(TMMapArea&& move) Y_COLD {  +            BufPtr = move.BufPtr;  +            Size = move.Size;  +  +            move.BufPtr = nullptr;  +            move.Size = 0;  +            return *this;  +        }  +  +        void Reset(size_t amount) Y_COLD {  +            MUnmap();  +            MMap(amount);  +        }  +  +        ~TMMapArea() noexcept Y_COLD {  +            MUnmap();  +        }  +  +        size_t GetSize() const noexcept {  +            return Size;  +        }  +  +        void* GetPtr() const noexcept {  +            return BufPtr;  +        }  +  +    private:  +        void* BufPtr;  +        size_t Size;  +#ifdef _win_  +        HANDLE Mapping;  +#endif  +  +        void MMap(size_t amount);  +        void MUnmap();  +    };  +  +    TMMapArea Buf;  +  +    void* GetGlobalBuffer() const noexcept {  +        return Buf.GetPtr();  +    }  +  +    static unsigned GetSelfCpu() noexcept;  +  +    static TMemoryLog* MemLogBuffer;  +    static Y_POD_THREAD(TThread::TId) LogThreadId;  +    static char* LastMarkIsHere;  +};  +  +// it's no use of sanitizing this function  +NO_SANITIZE_THREAD   char* BareMemLogWrite( -    const char* begin, size_t msgSize, bool isLast = true) noexcept; - -// it's no use of sanitizing this function -NO_SANITIZE_THREAD +    const char* begin, size_t msgSize, bool isLast = true) noexcept;  +  +// it's no use of sanitizing this function  +NO_SANITIZE_THREAD   bool MemLogWrite( -    const char* begin, size_t msgSize, bool addLF = false) noexcept; - -Y_WRAPPER inline bool MemLogWrite(const char* begin, const char* end) noexcept { -    if (end <= begin) { -        return false; -    } - -    size_t msgSize = end - begin; -    return MemLogWrite(begin, msgSize); -} - -template <typename TObj> -bool MemLogWriteStruct(const TObj* obj) noexcept { -    auto begin = (const char*)(const void*)obj; -    return MemLogWrite(begin, begin + sizeof(TObj)); -} - +    const char* begin, size_t msgSize, bool addLF = false) noexcept;  +  +Y_WRAPPER inline bool MemLogWrite(const char* begin, const char* end) noexcept {  +    if (end <= begin) {  +        return false;  +    }  +  +    size_t msgSize = end - begin;  +    return MemLogWrite(begin, msgSize);  +}  +  +template <typename TObj>  +bool MemLogWriteStruct(const TObj* obj) noexcept {  +    auto begin = (const char*)(const void*)obj;  +    return MemLogWrite(begin, begin + sizeof(TObj));  +}  +   Y_PRINTF_FORMAT(1, 0) -bool MemLogVPrintF(const char* format, va_list params) noexcept; - +bool MemLogVPrintF(const char* format, va_list params) noexcept;  +   Y_PRINTF_FORMAT(1, 2)  Y_WRAPPER -inline bool MemLogPrintF(const char* format, ...) noexcept { -    va_list params; -    va_start(params, format); -    auto result = MemLogVPrintF(format, params); -    va_end(params); -    return result; -} - -Y_WRAPPER inline bool MemLogWriteNullTerm(const char* str) noexcept { -    return MemLogWrite(str, strlen(str)); -} +inline bool MemLogPrintF(const char* format, ...) noexcept {  +    va_list params;  +    va_start(params, format);  +    auto result = MemLogVPrintF(format, params);  +    va_end(params);  +    return result;  +}  +  +Y_WRAPPER inline bool MemLogWriteNullTerm(const char* str) noexcept {  +    return MemLogWrite(str, strlen(str));  +}  diff --git a/library/cpp/actors/memory_log/mmap.cpp b/library/cpp/actors/memory_log/mmap.cpp index 201998d3433..b72feb1112b 100644 --- a/library/cpp/actors/memory_log/mmap.cpp +++ b/library/cpp/actors/memory_log/mmap.cpp @@ -1,63 +1,63 @@ -#include "memlog.h" - +#include "memlog.h"  +   #if defined(_unix_)  #include <sys/mman.h>  #elif defined(_win_)  #include <util/system/winint.h> -#else +#else   #error NO IMPLEMENTATION FOR THE PLATFORM -#endif - -void TMemoryLog::TMMapArea::MMap(size_t amount) { -    Y_VERIFY(amount > 0); - +#endif  +  +void TMemoryLog::TMMapArea::MMap(size_t amount) {  +    Y_VERIFY(amount > 0);  +  +#if defined(_unix_)  +    constexpr int mmapProt = PROT_READ | PROT_WRITE;  +#if defined(_linux_)  +    constexpr int mmapFlags = MAP_PRIVATE | MAP_ANON | MAP_POPULATE;  +#else  +    constexpr int mmapFlags = MAP_PRIVATE | MAP_ANON;  +#endif  +  +    BufPtr = ::mmap(nullptr, amount, mmapProt, mmapFlags, -1, 0);  +    if (BufPtr == MAP_FAILED) {  +        throw std::bad_alloc();  +    }  +  +#elif defined(_win_)  +    Mapping = ::CreateFileMapping(  +        (HANDLE)-1, nullptr, PAGE_READWRITE, 0, amount, nullptr);  +    if (Mapping == NULL) {  +        throw std::bad_alloc();  +    }  +    BufPtr = ::MapViewOfFile(Mapping, FILE_MAP_WRITE, 0, 0, amount);  +    if (BufPtr == NULL) {  +        throw std::bad_alloc();  +    }  +#endif  +  +    Size = amount;  +}  +  +void TMemoryLog::TMMapArea::MUnmap() {  +    if (BufPtr == nullptr) {  +        return;  +    }  +   #if defined(_unix_) -    constexpr int mmapProt = PROT_READ | PROT_WRITE; -#if defined(_linux_) -    constexpr int mmapFlags = MAP_PRIVATE | MAP_ANON | MAP_POPULATE; -#else -    constexpr int mmapFlags = MAP_PRIVATE | MAP_ANON; -#endif - -    BufPtr = ::mmap(nullptr, amount, mmapProt, mmapFlags, -1, 0); -    if (BufPtr == MAP_FAILED) { -        throw std::bad_alloc(); -    } - -#elif defined(_win_) -    Mapping = ::CreateFileMapping( -        (HANDLE)-1, nullptr, PAGE_READWRITE, 0, amount, nullptr); -    if (Mapping == NULL) { -        throw std::bad_alloc(); -    } -    BufPtr = ::MapViewOfFile(Mapping, FILE_MAP_WRITE, 0, 0, amount); -    if (BufPtr == NULL) { -        throw std::bad_alloc(); -    } -#endif - -    Size = amount; -} - -void TMemoryLog::TMMapArea::MUnmap() { -    if (BufPtr == nullptr) { -        return; -    } - -#if defined(_unix_) -    int result = ::munmap(BufPtr, Size); -    Y_VERIFY(result == 0); - -#elif defined(_win_) -    BOOL result = ::UnmapViewOfFile(BufPtr); -    Y_VERIFY(result != 0); - -    result = ::CloseHandle(Mapping); -    Y_VERIFY(result != 0); - -    Mapping = 0; -#endif - -    BufPtr = nullptr; -    Size = 0; -} +    int result = ::munmap(BufPtr, Size);  +    Y_VERIFY(result == 0);  +  +#elif defined(_win_)  +    BOOL result = ::UnmapViewOfFile(BufPtr);  +    Y_VERIFY(result != 0);  +  +    result = ::CloseHandle(Mapping);  +    Y_VERIFY(result != 0);  +  +    Mapping = 0;  +#endif  +  +    BufPtr = nullptr;  +    Size = 0;  +}  diff --git a/library/cpp/actors/memory_log/ya.make b/library/cpp/actors/memory_log/ya.make index d89d5db4d74..441b51b3c73 100644 --- a/library/cpp/actors/memory_log/ya.make +++ b/library/cpp/actors/memory_log/ya.make @@ -1,19 +1,19 @@ -LIBRARY() - +LIBRARY()  +   OWNER(      agri      g:kikimr  ) - -SRCS( -    memlog.cpp -    memlog.h -    mmap.cpp -) - -PEERDIR( +  +SRCS(  +    memlog.cpp  +    memlog.h  +    mmap.cpp  +)  +  +PEERDIR(       library/cpp/threading/queue -    contrib/libs/linuxvdso -) - -END() +    contrib/libs/linuxvdso  +)  +  +END()  diff --git a/library/cpp/actors/prof/tag.cpp b/library/cpp/actors/prof/tag.cpp index 9ccf03e1a97..46b53d804fe 100644 --- a/library/cpp/actors/prof/tag.cpp +++ b/library/cpp/actors/prof/tag.cpp @@ -1,6 +1,6 @@ -#include "tag.h" +#include "tag.h"   #include "tcmalloc.h" - +   #include <library/cpp/charset/ci_string.h>  #include <library/cpp/containers/atomizer/atomizer.h>  #include <library/cpp/malloc/api/malloc.h> @@ -13,9 +13,9 @@  #include <util/generic/singleton.h>  #include <util/generic/string.h>  #include <util/generic/vector.h> -#include <util/system/mutex.h> - -namespace NProfiling { +#include <util/system/mutex.h>  +  +namespace NProfiling {       class TStringAtoms {      private:          TMutex Mutex; @@ -59,19 +59,19 @@ namespace NProfiling {              }          }      }; - +       ui32 MakeTag(const char* s) {          return TStringAtoms::Instance().MakeTag(s);      } - +       ui32 MakeTags(const TVector<const char*>& ss) {          return TStringAtoms::Instance().MakeTags(ss);      } - +       const char* GetTag(ui32 tag) {          return TStringAtoms::Instance().GetTag(tag); -    } - +    }  +       size_t GetTagsCount() {          return TStringAtoms::Instance().GetTagsCount();      } diff --git a/library/cpp/actors/prof/tag.h b/library/cpp/actors/prof/tag.h index 357e264a229..ec4bed5b082 100644 --- a/library/cpp/actors/prof/tag.h +++ b/library/cpp/actors/prof/tag.h @@ -1,22 +1,22 @@ -#pragma once - +#pragma once  +   #include <util/generic/fwd.h> - -/* -  Common registry for tagging memory profiler. -  Register a new tag with MakeTag using a unique string. +  +/*  +  Common registry for tagging memory profiler.  +  Register a new tag with MakeTag using a unique string.     Use registered tags with SetThreadAllocTag function in allocator API. -*/ - -namespace NProfiling { +*/  +  +namespace NProfiling {       ui32 MakeTag(const char* s); - +       // Make only unique tags. Y_VERIFY inside.      ui32 MakeTags(const TVector<const char*>& ss); - +       const char* GetTag(ui32 tag);      size_t GetTagsCount(); - +       using TSetThreadAllocTag = ui32(ui32 tag);      extern TSetThreadAllocTag* SetThreadAllocTag; @@ -31,32 +31,32 @@ namespace NProfiling {              ui32 newTag = MakeTag(tagName);              RestoreTag = SetThreadAllocTag(newTag);          } - +           TMemoryTagScope(TMemoryTagScope&& move)              : RestoreTag(move.RestoreTag)              , Released(move.Released)          {              move.Released = true;          } - +           TMemoryTagScope& operator=(TMemoryTagScope&& move) {              RestoreTag = move.RestoreTag;              Released = move.Released;              move.Released = true;              return *this;          } - +           static void Reset(ui32 tag) {              SetThreadAllocTag(tag); -        } - +        }  +           void Release() {              if (!Released) {                  SetThreadAllocTag(RestoreTag);                  Released = true;              }          } - +           ~TMemoryTagScope() {              if (!Released) {                  SetThreadAllocTag(RestoreTag); diff --git a/library/cpp/actors/prof/ut/tag_ut.cpp b/library/cpp/actors/prof/ut/tag_ut.cpp index accf3921ab1..43c56ecddcf 100644 --- a/library/cpp/actors/prof/ut/tag_ut.cpp +++ b/library/cpp/actors/prof/ut/tag_ut.cpp @@ -1,68 +1,68 @@ -#include "tag.h" - +#include "tag.h"  +   #include <library/cpp/testing/unittest/registar.h> +  +using namespace NProfiling;  +  +class TAtomTagsTest: public TTestBase {  +private:  +    UNIT_TEST_SUITE(TAtomTagsTest);  +    UNIT_TEST(Test_MakeTag);  +    UNIT_TEST(Test_Make2Tags);  +    UNIT_TEST(Test_MakeTagTwice);  +  +    UNIT_TEST(Test_MakeAndGetTag);  +  +    UNIT_TEST(Test_MakeVector);  +    UNIT_TEST_SUITE_END();  -using namespace NProfiling; - -class TAtomTagsTest: public TTestBase { -private: -    UNIT_TEST_SUITE(TAtomTagsTest); -    UNIT_TEST(Test_MakeTag); -    UNIT_TEST(Test_Make2Tags); -    UNIT_TEST(Test_MakeTagTwice); - -    UNIT_TEST(Test_MakeAndGetTag); - -    UNIT_TEST(Test_MakeVector); -    UNIT_TEST_SUITE_END(); - -public: -    void Test_MakeTag(); -    void Test_Make2Tags(); -    void Test_MakeTagTwice(); -    void Test_MakeAndGetTag(); -    void Test_MakeVector(); -}; - -UNIT_TEST_SUITE_REGISTRATION(TAtomTagsTest); - -void TAtomTagsTest::Test_MakeTag() { -    ui32 tag = MakeTag("a tag"); -    UNIT_ASSERT(tag != 0); -} - -void TAtomTagsTest::Test_Make2Tags() { -    ui32 tag1 = MakeTag("a tag 1"); -    ui32 tag2 = MakeTag("a tag 2"); -    UNIT_ASSERT(tag1 != 0); -    UNIT_ASSERT(tag2 != 0); -    UNIT_ASSERT(tag1 != tag2); -} - -void TAtomTagsTest::Test_MakeTagTwice() { -    ui32 tag1 = MakeTag("a tag twice"); -    ui32 tag2 = MakeTag("a tag twice"); -    UNIT_ASSERT(tag1 != 0); -    UNIT_ASSERT(tag1 == tag2); -} - -void TAtomTagsTest::Test_MakeAndGetTag() { -    const char* makeStr = "tag to get"; -    ui32 tag = MakeTag(makeStr); -    const char* tagStr = GetTag(tag); -    UNIT_ASSERT_STRINGS_EQUAL(makeStr, tagStr); -} - -void TAtomTagsTest::Test_MakeVector() { +public:  +    void Test_MakeTag();  +    void Test_Make2Tags();  +    void Test_MakeTagTwice();  +    void Test_MakeAndGetTag();  +    void Test_MakeVector();  +};  +  +UNIT_TEST_SUITE_REGISTRATION(TAtomTagsTest);  +  +void TAtomTagsTest::Test_MakeTag() {  +    ui32 tag = MakeTag("a tag");  +    UNIT_ASSERT(tag != 0);  +}  +  +void TAtomTagsTest::Test_Make2Tags() {  +    ui32 tag1 = MakeTag("a tag 1");  +    ui32 tag2 = MakeTag("a tag 2");  +    UNIT_ASSERT(tag1 != 0);  +    UNIT_ASSERT(tag2 != 0);  +    UNIT_ASSERT(tag1 != tag2);  +}  +  +void TAtomTagsTest::Test_MakeTagTwice() {  +    ui32 tag1 = MakeTag("a tag twice");  +    ui32 tag2 = MakeTag("a tag twice");  +    UNIT_ASSERT(tag1 != 0);  +    UNIT_ASSERT(tag1 == tag2);  +}  +  +void TAtomTagsTest::Test_MakeAndGetTag() {  +    const char* makeStr = "tag to get";  +    ui32 tag = MakeTag(makeStr);  +    const char* tagStr = GetTag(tag);  +    UNIT_ASSERT_STRINGS_EQUAL(makeStr, tagStr);  +}  +  +void TAtomTagsTest::Test_MakeVector() {       TVector<const char*> strs = { -        "vector tag 0", -        "vector tag 1", -        "vector tag 3", +        "vector tag 0",  +        "vector tag 1",  +        "vector tag 3",           "vector tag 4"}; -    ui32 baseTag = MakeTags(strs); -    UNIT_ASSERT(baseTag != 0); -    for (ui32 i = 0; i < strs.size(); ++i) { -        const char* str = GetTag(baseTag + i); -        UNIT_ASSERT_STRINGS_EQUAL(str, strs[i]); -    } -} +    ui32 baseTag = MakeTags(strs);  +    UNIT_ASSERT(baseTag != 0);  +    for (ui32 i = 0; i < strs.size(); ++i) {  +        const char* str = GetTag(baseTag + i);  +        UNIT_ASSERT_STRINGS_EQUAL(str, strs[i]);  +    }  +}  diff --git a/library/cpp/actors/prof/ut/ya.make b/library/cpp/actors/prof/ut/ya.make index 47c58a8fb77..d177fbdd224 100644 --- a/library/cpp/actors/prof/ut/ya.make +++ b/library/cpp/actors/prof/ut/ya.make @@ -1,12 +1,12 @@  UNITTEST_FOR(library/cpp/actors/prof) - +   OWNER(      agri      g:kikimr  ) - -SRCS( -    tag_ut.cpp -) - -END() +  +SRCS(  +    tag_ut.cpp  +)  +  +END()  diff --git a/library/cpp/actors/prof/ya.make b/library/cpp/actors/prof/ya.make index b5e24975632..cdd3e57d1fe 100644 --- a/library/cpp/actors/prof/ya.make +++ b/library/cpp/actors/prof/ya.make @@ -1,19 +1,19 @@ -LIBRARY() - +LIBRARY()  +   OWNER(      agri      g:kikimr  ) - -SRCS( -    tag.cpp -) - -PEERDIR( +  +SRCS(  +    tag.cpp  +)  +  +PEERDIR(       library/cpp/charset      library/cpp/containers/atomizer -) - +)  +   IF (PROFILE_MEMORY_ALLOCATIONS)      CFLAGS(-DPROFILE_MEMORY_ALLOCATIONS)      PEERDIR( @@ -30,4 +30,4 @@ ELSE()      SRCS(tcmalloc_null.cpp)  ENDIF() -END() +END()  diff --git a/library/cpp/actors/protos/actors.proto b/library/cpp/actors/protos/actors.proto index 5fbd6d44ee5..5e40cbf6c2d 100644 --- a/library/cpp/actors/protos/actors.proto +++ b/library/cpp/actors/protos/actors.proto @@ -6,8 +6,8 @@ message TActorId {      required fixed64 RawX1 = 1;      required fixed64 RawX2 = 2;  } - -message TCallbackException { +  +message TCallbackException {       required TActorId ActorId = 1; -    required string ExceptionMessage = 2; -} +    required string ExceptionMessage = 2;  +}  diff --git a/library/cpp/actors/protos/interconnect.proto b/library/cpp/actors/protos/interconnect.proto index 2e3b0d0d15d..30a5c1bb743 100644 --- a/library/cpp/actors/protos/interconnect.proto +++ b/library/cpp/actors/protos/interconnect.proto @@ -14,7 +14,7 @@ message TEvNodeInfo {      optional string Address = 2;      optional uint32 Port = 3;  } - +   extend google.protobuf.FieldOptions {      optional string PrintName = 50376;  } @@ -43,19 +43,19 @@ message TScopeId {      optional fixed64 X2 = 2;  } -message THandshakeRequest { -    required uint64 Protocol = 1; - -    required uint64 ProgramPID = 2; -    required uint64 ProgramStartTime = 3; -    required uint64 Serial = 4; - -    required uint32 ReceiverNodeId = 5; +message THandshakeRequest {  +    required uint64 Protocol = 1;  +  +    required uint64 ProgramPID = 2;  +    required uint64 ProgramStartTime = 3;  +    required uint64 Serial = 4;  +  +    required uint32 ReceiverNodeId = 5;       required string SenderActorId = 6; - -    optional string SenderHostName = 7; -    optional string ReceiverHostName = 8; -    optional string UUID = 9; +  +    optional string SenderHostName = 7;  +    optional string ReceiverHostName = 8;  +    optional string UUID = 9;       optional TClusterUUIDs ClusterUUIDs = 13;      optional bytes Ballast = 10; @@ -72,15 +72,15 @@ message THandshakeRequest {      optional bool RequestModernFrame = 18;      optional bool RequestAuthOnly = 19; -} - -message THandshakeSuccess { -    required uint64 Protocol = 1; - -    required uint64 ProgramPID = 2; -    required uint64 ProgramStartTime = 3; -    required uint64 Serial = 4; - +}  +  +message THandshakeSuccess {  +    required uint64 Protocol = 1;  +  +    required uint64 ProgramPID = 2;  +    required uint64 ProgramStartTime = 3;  +    required uint64 Serial = 4;  +       required string SenderActorId = 5;      optional string VersionTag = 6; @@ -94,13 +94,13 @@ message THandshakeSuccess {      optional bool UseModernFrame = 11;      optional bool AuthOnly = 12; -} - -message THandshakeReply { -    optional THandshakeSuccess Success = 1; -    optional string ErrorExplaination = 2; +}  +  +message THandshakeReply {  +    optional THandshakeSuccess Success = 1;  +    optional string ErrorExplaination = 2;       optional bool CookieCheckResult = 3; -} +}   message TEvLoadMessage {      message THop { diff --git a/library/cpp/actors/protos/services_common.proto b/library/cpp/actors/protos/services_common.proto index afa0ec0073d..99347ad37e9 100644 --- a/library/cpp/actors/protos/services_common.proto +++ b/library/cpp/actors/protos/services_common.proto @@ -7,8 +7,8 @@ enum EServiceCommon {      GLOBAL = 0;      INTERCONNECT = 1; -    TEST = 2; -    PROTOCOLS = 3; +    TEST = 2;  +    PROTOCOLS = 3;       INTERCONNECT_SPEED_TEST = 4;      INTERCONNECT_STATUS = 5;      INTERCONNECT_NETWORK = 6; diff --git a/library/cpp/actors/protos/unittests.proto b/library/cpp/actors/protos/unittests.proto index a856b0942ad..68b662b9b3e 100644 --- a/library/cpp/actors/protos/unittests.proto +++ b/library/cpp/actors/protos/unittests.proto @@ -1,17 +1,17 @@  option cc_enable_arenas = true; -message TSimple { -    required string Str1 = 1; -    optional string Str2 = 2; -    optional uint64 Number1 = 3; -} - -message TBigMessage { -    repeated TSimple Simples = 1; -    repeated string ManyStr = 2; -    optional string OneMoreStr = 3; -    optional uint64 YANumber = 4; -} +message TSimple {  +    required string Str1 = 1;  +    optional string Str2 = 2;  +    optional uint64 Number1 = 3;  +}  +  +message TBigMessage {  +    repeated TSimple Simples = 1;  +    repeated string ManyStr = 2;  +    optional string OneMoreStr = 3;  +    optional uint64 YANumber = 4;  +}   message TMessageWithPayload {      optional string Meta = 1; diff --git a/library/cpp/actors/testlib/test_runtime.cpp b/library/cpp/actors/testlib/test_runtime.cpp index 6fa25b99656..0459f76386e 100644 --- a/library/cpp/actors/testlib/test_runtime.cpp +++ b/library/cpp/actors/testlib/test_runtime.cpp @@ -74,7 +74,7 @@ namespace NActors {              ActorSystem->Stop();          ActorSystem.Destroy(); -        Poller.Reset(); +        Poller.Reset();       }      TTestActorRuntimeBase::TNodeDataBase::~TNodeDataBase() { @@ -909,17 +909,17 @@ namespace NActors {          case TMailboxType::Revolving:              UnlockFromExecution((TMailboxTable::TRevolvingMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter);              break; -        case TMailboxType::HTSwap: +        case TMailboxType::HTSwap:               UnlockFromExecution((TMailboxTable::THTSwapMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter); -            break; -        case TMailboxType::ReadAsFilled: +            break;  +        case TMailboxType::ReadAsFilled:               UnlockFromExecution((TMailboxTable::TReadAsFilledMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter); -            break; -        case TMailboxType::TinyReadAsFilled: +            break;  +        case TMailboxType::TinyReadAsFilled:               UnlockFromExecution((TMailboxTable::TTinyReadAsFilledMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter); -            break; +            break;           default: -            Y_FAIL("Unsupported mailbox type"); +            Y_FAIL("Unsupported mailbox type");           }          return actorId; @@ -1645,13 +1645,13 @@ namespace NActors {          setup->LocalServices = node->LocalServices;          setup->Interconnect.ProxyActors.resize(FirstNodeId + NodeCount);          const TActorId nameserviceId = GetNameserviceActorId(); - -        TIntrusivePtr<TInterconnectProxyCommon> common; -        common.Reset(new TInterconnectProxyCommon); -        common->NameserviceId = nameserviceId; -        common->MonCounters = interconnectCounters; +  +        TIntrusivePtr<TInterconnectProxyCommon> common;  +        common.Reset(new TInterconnectProxyCommon);  +        common->NameserviceId = nameserviceId;  +        common->MonCounters = interconnectCounters;           common->TechnicalSelfHostName = "::1"; - +           if (!UseRealThreads) {              common->Settings.DeadPeer = TDuration::Max();              common->Settings.CloseOnIdle = TDuration::Max(); @@ -1668,7 +1668,7 @@ namespace NActors {                  continue;              const ui32 peerNodeId = FirstNodeId + proxyNodeIndex; - +               IActor *proxyActor = UseRealInterconnect                  ? new TInterconnectProxyTCP(peerNodeId, common)                  : InterconnectMock.CreateProxyMock(setup->NodeId, peerNodeId, common); diff --git a/library/cpp/actors/testlib/test_runtime.h b/library/cpp/actors/testlib/test_runtime.h index 26e3b45c984..cca58766451 100644 --- a/library/cpp/actors/testlib/test_runtime.h +++ b/library/cpp/actors/testlib/test_runtime.h @@ -556,7 +556,7 @@ namespace NActors {              TIntrusivePtr<NMonitoring::TDynamicCounters> DynamicCounters;              TIntrusivePtr<NActors::NLog::TSettings> LogSettings; -            TIntrusivePtr<NInterconnect::TPollerThreads> Poller; +            TIntrusivePtr<NInterconnect::TPollerThreads> Poller;               volatile ui64* ActorSystemTimestamp;              volatile ui64* ActorSystemMonotonic;              TVector<std::pair<TActorId, TActorSetupCmd> > LocalServices; diff --git a/library/cpp/actors/util/funnel_queue.h b/library/cpp/actors/util/funnel_queue.h index 0e21e2617cd..d760252054c 100644 --- a/library/cpp/actors/util/funnel_queue.h +++ b/library/cpp/actors/util/funnel_queue.h @@ -91,62 +91,62 @@ protected:          delete entry;          return next;      } - -protected: -    struct TEntryIter { -        TEntry* ptr; - -        ElementType& operator*() { -            return ptr->Data; -        } - -        ElementType* operator->() { -            return &ptr->Data; -        } - -        TEntryIter& operator++() { -            ptr = AtomicGet(ptr->Next); -            return *this; -        } - -        bool operator!=(const TEntryIter& other) const { -            return ptr != other.ptr; -        } - -        bool operator==(const TEntryIter& other) const { -            return ptr == other.ptr; -        } -    }; - -    struct TConstEntryIter { -        const TEntry* ptr; - -        const ElementType& operator*() { -            return ptr->Data; -        } - -        const ElementType* operator->() { -            return &ptr->Data; -        } - -        TEntryIter& operator++() { -            ptr = AtomicGet(ptr->Next); -            return *this; -        } - -        bool operator!=(const TConstEntryIter& other) const { -            return ptr != other.ptr; -        } - -        bool operator==(const TConstEntryIter& other) const { -            return ptr == other.ptr; -        } -    }; - -public: -    using const_iterator = TConstEntryIter; -    using iterator = TEntryIter; - +  +protected:  +    struct TEntryIter {  +        TEntry* ptr;  +  +        ElementType& operator*() {  +            return ptr->Data;  +        }  +  +        ElementType* operator->() {  +            return &ptr->Data;  +        }  +  +        TEntryIter& operator++() {  +            ptr = AtomicGet(ptr->Next);  +            return *this;  +        }  +  +        bool operator!=(const TEntryIter& other) const {  +            return ptr != other.ptr;  +        }  +  +        bool operator==(const TEntryIter& other) const {  +            return ptr == other.ptr;  +        }  +    };  +  +    struct TConstEntryIter {  +        const TEntry* ptr;  +  +        const ElementType& operator*() {  +            return ptr->Data;  +        }  +  +        const ElementType* operator->() {  +            return &ptr->Data;  +        }  +  +        TEntryIter& operator++() {  +            ptr = AtomicGet(ptr->Next);  +            return *this;  +        }  +  +        bool operator!=(const TConstEntryIter& other) const {  +            return ptr != other.ptr;  +        }  +  +        bool operator==(const TConstEntryIter& other) const {  +            return ptr == other.ptr;  +        }  +    };  +  +public:  +    using const_iterator = TConstEntryIter;  +    using iterator = TEntryIter;  +       iterator begin() {          return {AtomicGet(Front)};      } @@ -156,7 +156,7 @@ public:      const_iterator begin() const {          return {AtomicGet(Front)};      } - +       iterator end() {          return {nullptr};      } diff --git a/library/cpp/actors/util/recentwnd.h b/library/cpp/actors/util/recentwnd.h index ba1ede6f292..29425301e40 100644 --- a/library/cpp/actors/util/recentwnd.h +++ b/library/cpp/actors/util/recentwnd.h @@ -1,28 +1,28 @@ -#pragma once +#pragma once  -#include <util/generic/deque.h> - -template <typename TElem, +#include <util/generic/deque.h>  +  +template <typename TElem,             template <typename, typename...> class TContainer = TDeque> -class TRecentWnd { -public: +class TRecentWnd {  +public:       TRecentWnd(ui32 wndSize)          : MaxWndSize_(wndSize)      {      } - -    void Push(const TElem& elem) { -        if (Window_.size() == MaxWndSize_) -            Window_.erase(Window_.begin()); -        Window_.emplace_back(elem); -    } - -    void Push(TElem&& elem) { -        if (Window_.size() == MaxWndSize_) -            Window_.erase(Window_.begin()); -        Window_.emplace_back(std::move(elem)); -    } - +  +    void Push(const TElem& elem) {  +        if (Window_.size() == MaxWndSize_)  +            Window_.erase(Window_.begin());  +        Window_.emplace_back(elem);  +    }  +  +    void Push(TElem&& elem) {  +        if (Window_.size() == MaxWndSize_)  +            Window_.erase(Window_.begin());  +        Window_.emplace_back(std::move(elem));  +    }  +       TElem& Last() {          return Window_.back();      } @@ -35,33 +35,33 @@ public:      ui64 Size() const {          return Window_.size();      } - -    using const_iterator = typename TContainer<TElem>::const_iterator; - +  +    using const_iterator = typename TContainer<TElem>::const_iterator;  +       const_iterator begin() {          return Window_.begin();      }      const_iterator end() {          return Window_.end();      } +  +    void Reset(ui32 wndSize = 0) {  +        Window_.clear();  +        if (wndSize != 0) {  +            MaxWndSize_ = wndSize;  +        }  +    }  +  +    void ResetWnd(ui32 wndSize) {  +        Y_VERIFY(wndSize != 0);  +        MaxWndSize_ = wndSize;  +        if (Window_.size() > MaxWndSize_) {  +            Window_.erase(Window_.begin(),  +                          Window_.begin() + Window_.size() - MaxWndSize_);  +        }  +    }  -    void Reset(ui32 wndSize = 0) { -        Window_.clear(); -        if (wndSize != 0) { -            MaxWndSize_ = wndSize; -        } -    } - -    void ResetWnd(ui32 wndSize) { -        Y_VERIFY(wndSize != 0); -        MaxWndSize_ = wndSize; -        if (Window_.size() > MaxWndSize_) { -            Window_.erase(Window_.begin(), -                          Window_.begin() + Window_.size() - MaxWndSize_); -        } -    } - -private: -    TContainer<TElem> Window_; +private:  +    TContainer<TElem> Window_;       ui32 MaxWndSize_; -}; +};  diff --git a/library/cpp/actors/util/thread.h b/library/cpp/actors/util/thread.h index d742c8c585f..d90ab745fe6 100644 --- a/library/cpp/actors/util/thread.h +++ b/library/cpp/actors/util/thread.h @@ -10,17 +10,17 @@  inline void SetCurrentThreadName(const TString& name,                                   const ui32 maxCharsFromProcessName = 8) {  #if defined(_linux_) -    // linux limits threadname by 15 + \0 - -    TStringBuf procName(GetExecPath()); -    procName = procName.RNextTok('/'); -    procName = procName.SubStr(0, maxCharsFromProcessName); - +    // linux limits threadname by 15 + \0  +  +    TStringBuf procName(GetExecPath());  +    procName = procName.RNextTok('/');  +    procName = procName.SubStr(0, maxCharsFromProcessName);  +       TStringStream linuxName; -    linuxName << procName << "." << name; +    linuxName << procName << "." << name;       TThread::SetCurrentThreadName(linuxName.Str().data());  #else -    Y_UNUSED(maxCharsFromProcessName); +    Y_UNUSED(maxCharsFromProcessName);       TThread::SetCurrentThreadName(name.data());  #endif  } diff --git a/library/cpp/balloc/balloc.cpp b/library/cpp/balloc/balloc.cpp index fab489db4c5..ea37357c134 100644 --- a/library/cpp/balloc/balloc.cpp +++ b/library/cpp/balloc/balloc.cpp @@ -32,21 +32,21 @@ namespace NBalloc {      static void Y_FORCE_INLINE Free(void* ptr) {          if (ptr == nullptr) {              return; -        } -        TAllocHeader* allocHeader = ((TAllocHeader*)ptr) - 1; -        size_t size = allocHeader->AllocSize; +        }  +        TAllocHeader* allocHeader = ((TAllocHeader*)ptr) - 1;  +        size_t size = allocHeader->AllocSize;           const size_t signature = size & SIGNATURE_MASK;          if (Y_LIKELY(signature == ALIVE_SIGNATURE)) { -            allocHeader->AllocSize = 0; // abort later on double free +            allocHeader->AllocSize = 0; // abort later on double free   #ifdef DBG_FILL_MEMORY              memset(ptr, 0xde, size - signature);  #endif -            FreeRaw(allocHeader->Block); +            FreeRaw(allocHeader->Block);               if (NAllocStats::IsEnabled()) {                  NAllocStats::DecThreadAllocStats(size - signature);              }          } else if (signature == DISABLED_SIGNATURE) { -            LibcFree(allocHeader->Block); +            LibcFree(allocHeader->Block);           } else {              NMalloc::AbortFromCorruptedAllocator();          } diff --git a/library/cpp/http/io/stream.cpp b/library/cpp/http/io/stream.cpp index 6689be684fc..083a531b317 100644 --- a/library/cpp/http/io/stream.cpp +++ b/library/cpp/http/io/stream.cpp @@ -145,7 +145,7 @@ public:          , HasContentLength_(false)          , ContentLength_(0)          , ContentEncoded_(false) -        , Expect100Continue_(false) +        , Expect100Continue_(false)       {          BuildInputChain();          Y_ASSERT(Input_); @@ -204,10 +204,10 @@ public:          return HasContentLength_ || ChunkedInput_;      } -    inline bool HasExpect100Continue() const noexcept { -        return Expect100Continue_; -    } - +    inline bool HasExpect100Continue() const noexcept {  +        return Expect100Continue_;  +    }  +   private:      template <class Operation>      inline size_t Perform(size_t len, const Operation& operation) { @@ -324,14 +324,14 @@ private:                      }                  }                  [[fallthrough]]; -                HEADERCMP(header, "expect") { -                    auto findContinue = [&](const TStringBuf& s) { +                HEADERCMP(header, "expect") {  +                    auto findContinue = [&](const TStringBuf& s) {                           if (strnicmp(s.data(), "100-continue", 13) == 0) { -                            Expect100Continue_ = true; -                        } -                    }; -                    ForEach(header.Value(), findContinue); -                } +                            Expect100Continue_ = true;  +                        }  +                    };  +                    ForEach(header.Value(), findContinue);  +                }                   break;              }          } @@ -386,7 +386,7 @@ private:      ui64 ContentLength_;      bool ContentEncoded_; -    bool Expect100Continue_; +    bool Expect100Continue_;   };  THttpInput::THttpInput(IInputStream* slave) @@ -452,10 +452,10 @@ bool THttpInput::HasContent() const noexcept {      return Impl_->HasContent();  } -bool THttpInput::HasExpect100Continue() const noexcept { -    return Impl_->HasExpect100Continue(); -} - +bool THttpInput::HasExpect100Continue() const noexcept {  +    return Impl_->HasExpect100Continue();  +}  +   class THttpOutput::TImpl {      class TSizeCalculator: public IOutputStream {      public: @@ -512,11 +512,11 @@ public:      inline ~TImpl() {      } -    inline void SendContinue() { -        Output_->Write("HTTP/1.1 100 Continue\r\n\r\n"); -        Output_->Flush(); -    } - +    inline void SendContinue() {  +        Output_->Write("HTTP/1.1 100 Continue\r\n\r\n");  +        Output_->Flush();  +    }  +       inline void Write(const void* buf, size_t len) {          if (Finished_) {              ythrow THttpException() << "can not write to finished stream"; @@ -954,10 +954,10 @@ bool THttpOutput::CanBeKeepAlive() const noexcept {      return Impl_->CanBeKeepAlive();  } -void THttpOutput::SendContinue() { -    Impl_->SendContinue(); -} - +void THttpOutput::SendContinue() {  +    Impl_->SendContinue();  +}  +   const TString& THttpOutput::FirstLine() const noexcept {      return Impl_->FirstLine();  } diff --git a/library/cpp/http/io/stream.h b/library/cpp/http/io/stream.h index 78ca4fc814c..e0846ef107e 100644 --- a/library/cpp/http/io/stream.h +++ b/library/cpp/http/io/stream.h @@ -84,8 +84,8 @@ public:      /// Returns true if Content-Length or Transfer-Encoding header received      bool HasContent() const noexcept; -    bool HasExpect100Continue() const noexcept; - +    bool HasExpect100Continue() const noexcept;  +   private:      size_t DoRead(void* buf, size_t len) override;      size_t DoSkip(size_t len) override; @@ -145,8 +145,8 @@ public:      /// не завершается после окончания транзакции.      bool CanBeKeepAlive() const noexcept; -    void SendContinue(); - +    void SendContinue();  +       /*       * first line - response or request       */ diff --git a/library/cpp/http/server/http.cpp b/library/cpp/http/server/http.cpp index 128583bdd70..a1b70f10e1f 100644 --- a/library/cpp/http/server/http.cpp +++ b/library/cpp/http/server/http.cpp @@ -67,7 +67,7 @@ public:      THttpServer::TImpl* HttpServ_ = nullptr;      bool Reject_ = false;      TInstant LastUsed; -    TInstant AcceptMoment; +    TInstant AcceptMoment;       size_t ReceivedRequests = 0;  }; @@ -300,7 +300,7 @@ public:          ~TListenSocket() override {          } -        void OnPollEvent(TInstant) override { +        void OnPollEvent(TInstant) override {               SOCKET s = ::accept(S_, nullptr, nullptr);              if (s == INVALID_SOCKET) { @@ -589,7 +589,7 @@ void TClientConnection::OnPollEvent(TInstant now) {      }      THolder<TClientRequest> obj(HttpServ_->CreateRequest(this_)); -    AcceptMoment = now; +    AcceptMoment = now;       HttpServ_->AddRequest(obj, Reject_);  } @@ -776,10 +776,10 @@ NAddr::IRemoteAddrRef TClientRequest::GetListenerSockAddrRef() const noexcept {      return Conn_->ListenerSockAddrRef_;  } -TInstant TClientRequest::AcceptMoment() const noexcept { -    return Conn_->AcceptMoment; -} - +TInstant TClientRequest::AcceptMoment() const noexcept {  +    return Conn_->AcceptMoment;  +}  +   /*   * TRequestReplier   */ diff --git a/library/cpp/http/server/http.h b/library/cpp/http/server/http.h index b292d38f270..0b1607bfbb9 100644 --- a/library/cpp/http/server/http.h +++ b/library/cpp/http/server/http.h @@ -8,7 +8,7 @@  #include <util/memory/blob.h>  #include <util/generic/ptr.h>  #include <util/generic/vector.h> -#include <util/system/atomic.h> +#include <util/system/atomic.h>   class IThreadFactory;  class TClientRequest; @@ -90,8 +90,8 @@ public:      const IThreadPool& GetRequestQueue() const;      const IThreadPool& GetFailQueue() const; -    static TAtomicBase AcceptReturnsInvalidSocketCounter(); - +    static TAtomicBase AcceptReturnsInvalidSocketCounter();  +   private:      bool MaxRequestsReached() const; @@ -120,7 +120,7 @@ public:      THttpServer* HttpServ() const noexcept;      const TSocket& Socket() const noexcept;      NAddr::IRemoteAddrRef GetListenerSockAddrRef() const noexcept; -    TInstant AcceptMoment() const noexcept; +    TInstant AcceptMoment() const noexcept;       bool IsLocal() const;      bool CheckLoopback(); diff --git a/library/cpp/http/server/http_ex.cpp b/library/cpp/http/server/http_ex.cpp index e07db22bfc8..7fb63784826 100644 --- a/library/cpp/http/server/http_ex.cpp +++ b/library/cpp/http/server/http_ex.cpp @@ -27,45 +27,45 @@ bool THttpClientRequestExtension::ProcessHeaders(TBaseServerRequestData& rd, TBl      char* s = RequestString.begin(); -    enum EMethod { -        NotImplemented, -        Get, -        Post, -        Put, +    enum EMethod {  +        NotImplemented,  +        Get,  +        Post,  +        Put,           Patch,          Delete, -    }; - -    enum EMethod foundMethod; -    char* urlStart; - +    };  +  +    enum EMethod foundMethod;  +    char* urlStart;  +       if (strnicmp(s, "GET ", 4) == 0) { -        foundMethod = Get; -        urlStart = s + 4; -    } else if (strnicmp(s, "POST ", 5) == 0) { -        foundMethod = Post; -        urlStart = s + 5; -    } else if (strnicmp(s, "PUT ", 4) == 0) { -        foundMethod = Put; -        urlStart = s + 4; +        foundMethod = Get;  +        urlStart = s + 4;  +    } else if (strnicmp(s, "POST ", 5) == 0) {  +        foundMethod = Post;  +        urlStart = s + 5;  +    } else if (strnicmp(s, "PUT ", 4) == 0) {  +        foundMethod = Put;  +        urlStart = s + 4;       } else if (strnicmp(s, "PATCH ", 6) == 0) {          foundMethod = Patch;          urlStart = s + 6;      } else if (strnicmp(s, "DELETE ", 7) == 0) {          foundMethod = Delete;          urlStart = s + 7; -    } else { -        foundMethod = NotImplemented; -    } - -    switch (foundMethod) { +    } else {  +        foundMethod = NotImplemented;  +    }  +  +    switch (foundMethod) {           case Get:          case Delete:              if (!Parse(urlStart, rd)) {                  return false;              }              break; - +           case Post:          case Put:          case Patch: @@ -91,8 +91,8 @@ bool THttpClientRequestExtension::ProcessHeaders(TBaseServerRequestData& rd, TBl              } catch (...) {                  Output() << "HTTP/1.1 400 Bad request\r\n\r\n";                  return false; -            } - +            }  +               if (!Parse(urlStart, rd)) {                  return false;              } diff --git a/library/cpp/http/server/http_ut.cpp b/library/cpp/http/server/http_ut.cpp index cc62bb988e7..4e0e6bd69dd 100644 --- a/library/cpp/http/server/http_ut.cpp +++ b/library/cpp/http/server/http_ut.cpp @@ -137,7 +137,7 @@ Y_UNIT_TEST_SUITE(THttpServerTest) {      };      static const TString CrLf = "\r\n"; - +       struct TTestRequest {          TTestRequest(ui16 port, TString content = TString())              : Port(port) @@ -145,23 +145,23 @@ Y_UNIT_TEST_SUITE(THttpServerTest) {          {          } -        void CheckContinue(TSocketInput& si) { -            if (Expect100Continue) { -                TStringStream ss; +        void CheckContinue(TSocketInput& si) {  +            if (Expect100Continue) {  +                TStringStream ss;                   TString firstLine; -                si.ReadLine(firstLine); -                for (;;) { +                si.ReadLine(firstLine);  +                for (;;) {                       TString buf; -                    si.ReadLine(buf); +                    si.ReadLine(buf);                       if (buf.size() == 0) { -                        break; -                    } -                    ss << buf << CrLf; -                } -                UNIT_ASSERT_EQUAL(firstLine, "HTTP/1.1 100 Continue"); -            } -        } - +                        break;  +                    }  +                    ss << buf << CrLf;  +                }  +                UNIT_ASSERT_EQUAL(firstLine, "HTTP/1.1 100 Continue");  +            }  +        }  +           TString Execute() {              TSocket* s = nullptr;              THolder<TSocket> singleReqSocket; @@ -176,7 +176,7 @@ Y_UNIT_TEST_SUITE(THttpServerTest) {                  s = singleReqSocket.Get();              }              bool isPost = Type == "POST"; -            TSocketInput si(*s); +            TSocketInput si(*s);               if (UseHttpOutput) {                  TSocketOutput so(*s); @@ -194,21 +194,21 @@ Y_UNIT_TEST_SUITE(THttpServerTest) {                      } else {                          r << "Transfer-Encoding: chunked" << CrLf;                      } -                    if (Expect100Continue) { -                        r << "Expect: 100-continue" << CrLf; -                    } +                    if (Expect100Continue) {  +                        r << "Expect: 100-continue" << CrLf;  +                    }                   }                  r << CrLf;                  if (isPost) { -                    output.Write(r.Str()); -                    output.Flush(); -                    CheckContinue(si); -                    output.Write(Content); -                    output.Finish(); -                } else { -                    output.Write(r.Str()); -                    output.Finish(); +                    output.Write(r.Str());  +                    output.Flush();  +                    CheckContinue(si);  +                    output.Write(Content);  +                    output.Finish();  +                } else {  +                    output.Write(r.Str());  +                    output.Finish();                   }              } else {                  TStringStream r; @@ -222,9 +222,9 @@ Y_UNIT_TEST_SUITE(THttpServerTest) {                  if (EnableResponseEncoding) {                      r << "Accept-Encoding: gzip, deflate, x-gzip, x-deflate, y-lzo, y-lzf, y-lzq, y-bzip2, y-lzma" << CrLf;                  } -                if (isPost && Expect100Continue) { -                    r << "Expect: 100-continue" << CrLf; -                } +                if (isPost && Expect100Continue) {  +                    r << "Expect: 100-continue" << CrLf;  +                }                   if (isPost && ContentEncoding.size() && Content.size()) {                      r << "Content-Encoding: " << ContentEncoding << CrLf;                      TStringStream compressedContent; @@ -237,7 +237,7 @@ Y_UNIT_TEST_SUITE(THttpServerTest) {                      r << "Content-Length: " << compressedContent.Size() << CrLf;                      r << CrLf;                      s->Send(r.Data(), r.Size()); -                    CheckContinue(si); +                    CheckContinue(si);                       Hdr = r.Str();                      TString tosend = compressedContent.Str();                      s->Send(tosend.data(), tosend.size()); @@ -246,7 +246,7 @@ Y_UNIT_TEST_SUITE(THttpServerTest) {                          r << "Content-Length: " << Content.size() << CrLf;                          r << CrLf;                          s->Send(r.Data(), r.Size()); -                        CheckContinue(si); +                        CheckContinue(si);                           Hdr = r.Str();                          s->Send(Content.data(), Content.size());                      } else { @@ -286,7 +286,7 @@ Y_UNIT_TEST_SUITE(THttpServerTest) {          THolder<TSocket> KeepAlivedSocket;          bool EnableResponseEncoding = false;          TString Hdr; -        bool Expect100Continue = false; +        bool Expect100Continue = false;       };      class TFailingMtpQueue: public TSimpleThreadPool { @@ -354,10 +354,10 @@ Y_UNIT_TEST_SUITE(THttpServerTest) {                                  r.ContentEncoding = encoder;                                  for (bool expect100Continue : trueFalse) { -                                    r.Expect100Continue = expect100Continue; +                                    r.Expect100Continue = expect100Continue;                                       TString resp = r.Execute(); -                                    UNIT_ASSERT_C(resp == res, "diff echo response for request:\n" + r.GetDescription()); -                                } +                                    UNIT_ASSERT_C(resp == res, "diff echo response for request:\n" + r.GetDescription());  +                                }                               }                          }                      } diff --git a/library/cpp/lfalloc/lf_allocX64.h b/library/cpp/lfalloc/lf_allocX64.h index fd2a906d6ff..850e3a0b4f1 100644 --- a/library/cpp/lfalloc/lf_allocX64.h +++ b/library/cpp/lfalloc/lf_allocX64.h @@ -666,7 +666,7 @@ class TLFAllocFreeList {      TNode* volatile Head;      TNode* volatile Pending; -    TAtomic PendingToFreeListCounter; +    TAtomic PendingToFreeListCounter;       TAtomic AllocCount;      void* Padding; @@ -682,10 +682,10 @@ class TLFAllocFreeList {          TNode* res;          for (res = Head; res; res = Head) {              TNode* keepNext = res->Next; -            if (DoCas(&Head, keepNext, res) == res) { -                //Y_VERIFY(keepNext == res->Next); +            if (DoCas(&Head, keepNext, res) == res) {  +                //Y_VERIFY(keepNext == res->Next);                   break; -            } +            }           }          return res;      } @@ -712,19 +712,19 @@ public:              Enqueue(&Pending, newFree);      }      Y_FORCE_INLINE void* Alloc() { -        TAtomic keepCounter = AtomicAdd(PendingToFreeListCounter, 0); +        TAtomic keepCounter = AtomicAdd(PendingToFreeListCounter, 0);           TNode* fl = Pending;          if (AtomicAdd(AllocCount, 1) == 1) { -            // No other allocs in progress. -            // If (keepCounter == PendingToFreeListCounter) then Pending was not freed by other threads. -            // Hence Pending is not used in any concurrent DoAlloc() atm and can be safely moved to FreeList +            // No other allocs in progress.  +            // If (keepCounter == PendingToFreeListCounter) then Pending was not freed by other threads.  +            // Hence Pending is not used in any concurrent DoAlloc() atm and can be safely moved to FreeList               if (fl && keepCounter == AtomicAdd(PendingToFreeListCounter, 0) && DoCas(&Pending, (TNode*)nullptr, fl) == fl) {                  // pick first element from Pending and return it                  void* res = fl;                  fl = fl->Next;                  // if there are other elements in Pending list, add them to main free list                  FreeList(fl); -                AtomicAdd(PendingToFreeListCounter, 1); +                AtomicAdd(PendingToFreeListCounter, 1);                   AtomicAdd(AllocCount, -1);                  return res;              } @@ -1308,7 +1308,7 @@ static void AllocThreadInfo() {  struct TAllocHeader {      uint64_t Size; -    int Tag; +    int Tag;       int Cookie;  }; @@ -1331,7 +1331,7 @@ static inline TAllocHeader* GetAllocHeader(void* p) {  PERTHREAD int AllocationTag;  extern "C" int SetThreadAllocTag(int tag) {      int prevTag = AllocationTag; -    if (tag < DBG_ALLOC_MAX_TAG && tag >= 0) { +    if (tag < DBG_ALLOC_MAX_TAG && tag >= 0) {           AllocationTag = tag;      }      return prevTag; @@ -1417,7 +1417,7 @@ static inline void SampleDeallocation(TAllocHeader* p, int sizeIdx) {  }  static inline void TrackPerTagAllocation(TAllocHeader* p, int sizeIdx) { -    if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) { +    if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) {           Y_ASSERT_NOBT(sizeIdx < DBG_ALLOC_NUM_SIZES);          auto& global = GlobalPerTagAllocCounters[p->Tag][sizeIdx]; @@ -1432,7 +1432,7 @@ static inline void TrackPerTagAllocation(TAllocHeader* p, int sizeIdx) {  }  static inline void TrackPerTagDeallocation(TAllocHeader* p, int sizeIdx) { -    if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) { +    if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) {           Y_ASSERT_NOBT(sizeIdx < DBG_ALLOC_NUM_SIZES);          auto& global = GlobalPerTagAllocCounters[p->Tag][sizeIdx]; @@ -1609,10 +1609,10 @@ static Y_FORCE_INLINE void LFFree(void* p) {          return;      } -#if defined(LFALLOC_DBG) -    TrackDeallocation(p, nSizeIdx); -#endif - +#if defined(LFALLOC_DBG)  +    TrackDeallocation(p, nSizeIdx);  +#endif  +   #ifdef DBG_FILL_MEMORY      memset(p, 0xfe, nSizeIdxToSize[nSizeIdx]);  #endif diff --git a/library/cpp/sse/sse.h b/library/cpp/sse/sse.h index 19bac17de0b..918a942803f 100644 --- a/library/cpp/sse/sse.h +++ b/library/cpp/sse/sse.h @@ -1,18 +1,18 @@ -#pragma once - -/* -  The header chooses appropriate SSE support. -  On Intel: SSE intrinsics -  On ARM64: translation to NEON intrinsics or software emulation +#pragma once  +  +/*  +  The header chooses appropriate SSE support.  +  On Intel: SSE intrinsics  +  On ARM64: translation to NEON intrinsics or software emulation     On PowerPc: translation to Altivec intrinsics or software emulation -*/ +*/   /* Author: Vitaliy Manushkin <[email protected]>, Danila Kutenin <[email protected]> */ - -#include <util/system/platform.h> +  +#include <util/system/platform.h>   #if (defined(_i386_) || defined(_x86_64_)) && defined(_sse_) -#include <xmmintrin.h> -#include <emmintrin.h> +#include <xmmintrin.h>  +#include <emmintrin.h>   #include <pmmintrin.h>  #define ARCADIA_SSE  #if defined(_ssse3_) @@ -24,10 +24,10 @@  #if defined(_sse4_2_)  #include <nmmintrin.h>  #endif -#elif defined(_arm64_) -#include "sse2neon.h" +#elif defined(_arm64_)  +#include "sse2neon.h"   #define ARCADIA_SSE  #elif defined(_ppc64_)  #include "powerpc.h"  #define ARCADIA_SSE -#endif +#endif  diff --git a/library/cpp/sse/sse2neon.h b/library/cpp/sse/sse2neon.h index 695dbd3041b..af7f3ed2427 100644 --- a/library/cpp/sse/sse2neon.h +++ b/library/cpp/sse/sse2neon.h @@ -1,60 +1,60 @@ -#pragma once - -/* -  The header contains inlining code -  which translates SSE intrinsics to NEON intrinsics or software emulation. -  You are encouraged for commitments. -  Add missing intrinsics, add unittests, purify the implementation, -  merge and simplify templates. -  Warning: The code is made in deep nights, so it surely contains bugs, -  imperfections, flaws and all other kinds of errors and mistakes. -*/ -/* Author: Vitaliy Manushkin <[email protected]> */ - -#include <util/system/platform.h> +#pragma once  +  +/*  +  The header contains inlining code  +  which translates SSE intrinsics to NEON intrinsics or software emulation.  +  You are encouraged for commitments.  +  Add missing intrinsics, add unittests, purify the implementation,  +  merge and simplify templates.  +  Warning: The code is made in deep nights, so it surely contains bugs,  +  imperfections, flaws and all other kinds of errors and mistakes.  +*/  +/* Author: Vitaliy Manushkin <[email protected]> */  +  +#include <util/system/platform.h>   #include <util/system/compiler.h> -#include <util/system/types.h> - -#if !defined(_arm64_) -#error "This header is for ARM64 (aarch64) platform only. " \ +#include <util/system/types.h>  +  +#if !defined(_arm64_)  +#error "This header is for ARM64 (aarch64) platform only. " \       "Include sse.h instead of including this header directly." -#endif - -#include <arm_neon.h> - -union __m128i { -    uint64x2_t AsUi64x2; -    int64x2_t AsSi64x2; +#endif  +  +#include <arm_neon.h>  +  +union __m128i {  +    uint64x2_t AsUi64x2;  +    int64x2_t AsSi64x2;  +  +    uint32x4_t AsUi32x4;  +    int32x4_t AsSi32x4;  +  +    uint16x8_t AsUi16x8;  +    int16x8_t AsSi16x8;  +  +    uint8x16_t AsUi8x16;  +    int8x16_t AsSi8x16;  +  +    float32x4_t AsFloat32x4;  +    float64x2_t AsFloat64x2;  +};  +  +union __m128 {  +    float32x4_t AsFloat32x4;  +    float64x2_t AsFloat64x2;       uint32x4_t AsUi32x4;      int32x4_t AsSi32x4; -    uint16x8_t AsUi16x8; -    int16x8_t AsSi16x8; +    uint64x2_t AsUi64x2;  +    int64x2_t AsSi64x2;  -    uint8x16_t AsUi8x16; -    int8x16_t AsSi8x16; - -    float32x4_t AsFloat32x4; -    float64x2_t AsFloat64x2; -}; - -union __m128 { -    float32x4_t AsFloat32x4; -    float64x2_t AsFloat64x2; - -    uint32x4_t AsUi32x4; -    int32x4_t AsSi32x4; - -    uint64x2_t AsUi64x2; -    int64x2_t AsSi64x2; - -    uint8x16_t AsUi8x16; +    uint8x16_t AsUi8x16;       int8x16_t AsSi8x16;      __m128i As128i; -}; - +};  +   typedef float64x2_t __m128d;  enum _mm_hint @@ -72,128 +72,128 @@ Y_FORCE_INLINE void _mm_prefetch(const void *p, enum _mm_hint) {      __builtin_prefetch(p);  } -template <typename TType> -struct TQType; - -template <> -struct TQType<uint8x16_t> { -    static inline uint8x16_t& As(__m128i& value) { -        return value.AsUi8x16; -    } -    static inline const uint8x16_t& As(const __m128i& value) { -        return value.AsUi8x16; -    } -}; - -template <> -struct TQType<int8x16_t> { -    static inline int8x16_t& As(__m128i& value) { -        return value.AsSi8x16; -    } -    static inline const int8x16_t& As(const __m128i& value) { -        return value.AsSi8x16; -    } -}; - -template <> -struct TQType<uint16x8_t> { -    static inline uint16x8_t& As(__m128i& value) { -        return value.AsUi16x8; -    } -    static inline const uint16x8_t& As(const __m128i& value) { -        return value.AsUi16x8; -    } -}; - -template <> -struct TQType<int16x8_t> { -    static inline int16x8_t& As(__m128i& value) { -        return value.AsSi16x8; -    } -    static inline const int16x8_t& As(const __m128i& value) { -        return value.AsSi16x8; -    } -}; - -template <> -struct TQType<uint32x4_t> { -    static inline uint32x4_t& As(__m128i& value) { -        return value.AsUi32x4; -    } -    static inline const uint32x4_t& As(const __m128i& value) { -        return value.AsUi32x4; -    } -}; - -template <> -struct TQType<int32x4_t> { -    static inline int32x4_t& As(__m128i& value) { -        return value.AsSi32x4; -    } -    static inline const int32x4_t& As(const __m128i& value) { -        return value.AsSi32x4; -    } -}; - -template <> -struct TQType<uint64x2_t> { -    static inline uint64x2_t& As(__m128i& value) { -        return value.AsUi64x2; -    } -    static inline const uint64x2_t& As(const __m128i& value) { -        return value.AsUi64x2; -    } -    static inline uint64x2_t& As(__m128& value) { -        return value.AsUi64x2; -    } -    static inline const uint64x2_t& As(const __m128& value) { -        return value.AsUi64x2; -    } -}; - -template <> -struct TQType<int64x2_t> { -    static inline int64x2_t& As(__m128i& value) { -        return value.AsSi64x2; -    } -    static inline const int64x2_t& As(const __m128i& value) { -        return value.AsSi64x2; -    } -}; - -template <typename TValue> -struct TBaseWrapper { -    TValue Value; - +template <typename TType>  +struct TQType;  +  +template <>  +struct TQType<uint8x16_t> {  +    static inline uint8x16_t& As(__m128i& value) {  +        return value.AsUi8x16;  +    }  +    static inline const uint8x16_t& As(const __m128i& value) {  +        return value.AsUi8x16;  +    }  +};  +  +template <>  +struct TQType<int8x16_t> {  +    static inline int8x16_t& As(__m128i& value) {  +        return value.AsSi8x16;  +    }  +    static inline const int8x16_t& As(const __m128i& value) {  +        return value.AsSi8x16;  +    }  +};  +  +template <>  +struct TQType<uint16x8_t> {  +    static inline uint16x8_t& As(__m128i& value) {  +        return value.AsUi16x8;  +    }  +    static inline const uint16x8_t& As(const __m128i& value) {  +        return value.AsUi16x8;  +    }  +};  +  +template <>  +struct TQType<int16x8_t> {  +    static inline int16x8_t& As(__m128i& value) {  +        return value.AsSi16x8;  +    }  +    static inline const int16x8_t& As(const __m128i& value) {  +        return value.AsSi16x8;  +    }  +};  +  +template <>  +struct TQType<uint32x4_t> {  +    static inline uint32x4_t& As(__m128i& value) {  +        return value.AsUi32x4;  +    }  +    static inline const uint32x4_t& As(const __m128i& value) {  +        return value.AsUi32x4;  +    }  +};  +  +template <>  +struct TQType<int32x4_t> {  +    static inline int32x4_t& As(__m128i& value) {  +        return value.AsSi32x4;  +    }  +    static inline const int32x4_t& As(const __m128i& value) {  +        return value.AsSi32x4;  +    }  +};  +  +template <>  +struct TQType<uint64x2_t> {  +    static inline uint64x2_t& As(__m128i& value) {  +        return value.AsUi64x2;  +    }  +    static inline const uint64x2_t& As(const __m128i& value) {  +        return value.AsUi64x2;  +    }  +    static inline uint64x2_t& As(__m128& value) {  +        return value.AsUi64x2;  +    }  +    static inline const uint64x2_t& As(const __m128& value) {  +        return value.AsUi64x2;  +    }  +};  +  +template <>  +struct TQType<int64x2_t> {  +    static inline int64x2_t& As(__m128i& value) {  +        return value.AsSi64x2;  +    }  +    static inline const int64x2_t& As(const __m128i& value) {  +        return value.AsSi64x2;  +    }  +};  +  +template <typename TValue>  +struct TBaseWrapper {  +    TValue Value;  +       Y_FORCE_INLINE -    operator TValue&() { -        return Value; -    } - +    operator TValue&() {  +        return Value;  +    }  +       Y_FORCE_INLINE -    operator const TValue&() const { -        return Value; -    } -}; - -template <typename TOp, typename TFunc, TFunc* func, -          typename TDup, TDup* dupfunc> -struct TWrapperSingleDup: public TBaseWrapper<__m128i> { +    operator const TValue&() const {  +        return Value;  +    }  +};  +  +template <typename TOp, typename TFunc, TFunc* func,  +          typename TDup, TDup* dupfunc>  +struct TWrapperSingleDup: public TBaseWrapper<__m128i> {       Y_FORCE_INLINE -    TWrapperSingleDup(const __m128i& op, const int shift) { -        TQType<TOp>::As(Value) = func(TQType<TOp>::As(op), dupfunc(shift)); -    } -}; - -template <typename TOp, typename TFunc, TFunc* func, -          typename TDup, TDup* dupfunc> -struct TWrapperSingleNegDup: public TBaseWrapper<__m128i> { +    TWrapperSingleDup(const __m128i& op, const int shift) {  +        TQType<TOp>::As(Value) = func(TQType<TOp>::As(op), dupfunc(shift));  +    }  +};  +  +template <typename TOp, typename TFunc, TFunc* func,  +          typename TDup, TDup* dupfunc>  +struct TWrapperSingleNegDup: public TBaseWrapper<__m128i> {       Y_FORCE_INLINE -    TWrapperSingleNegDup(const __m128i& op, const int shift) { -        TQType<TOp>::As(Value) = func(TQType<TOp>::As(op), dupfunc(-shift)); -    } -}; - +    TWrapperSingleNegDup(const __m128i& op, const int shift) {  +        TQType<TOp>::As(Value) = func(TQType<TOp>::As(op), dupfunc(-shift));  +    }  +};  +   inline __m128i _mm_srl_epi16(__m128i a, __m128i count) {      __m128i res;      res.AsUi16x8 = vshlq_u16(a.AsUi16x8, vdupq_n_s16(-count.AsUi16x8[0])); @@ -225,16 +225,16 @@ inline __m128i _mm_srai_epi32(__m128i a, int count) {      return res;  } -using _mm_srli_epi16 = -    TWrapperSingleNegDup<uint16x8_t, decltype(vshlq_u16), vshlq_u16, -                         decltype(vdupq_n_s16), vdupq_n_s16>; -using _mm_srli_epi32 = -    TWrapperSingleNegDup<uint32x4_t, decltype(vshlq_u32), vshlq_u32, -                         decltype(vdupq_n_s32), vdupq_n_s32>; -using _mm_srli_epi64 = -    TWrapperSingleNegDup<uint64x2_t, decltype(vshlq_u64), vshlq_u64, -                         decltype(vdupq_n_s64), vdupq_n_s64>; - +using _mm_srli_epi16 =  +    TWrapperSingleNegDup<uint16x8_t, decltype(vshlq_u16), vshlq_u16,  +                         decltype(vdupq_n_s16), vdupq_n_s16>;  +using _mm_srli_epi32 =  +    TWrapperSingleNegDup<uint32x4_t, decltype(vshlq_u32), vshlq_u32,  +                         decltype(vdupq_n_s32), vdupq_n_s32>;  +using _mm_srli_epi64 =  +    TWrapperSingleNegDup<uint64x2_t, decltype(vshlq_u64), vshlq_u64,  +                         decltype(vdupq_n_s64), vdupq_n_s64>;  +   inline __m128i _mm_sll_epi16(__m128i a, __m128i count) {      __m128i res; @@ -255,57 +255,57 @@ inline __m128i _mm_sll_epi64(__m128i a, __m128i count) {      return res;  } -using _mm_slli_epi16 = -    TWrapperSingleDup<uint16x8_t, decltype(vshlq_u16), vshlq_u16, -                      decltype(vdupq_n_s16), vdupq_n_s16>; -using _mm_slli_epi32 = -    TWrapperSingleDup<uint32x4_t, decltype(vshlq_u32), vshlq_u32, -                      decltype(vdupq_n_s32), vdupq_n_s32>; -using _mm_slli_epi64 = -    TWrapperSingleDup<uint64x2_t, decltype(vshlq_u64), vshlq_u64, -                      decltype(vdupq_n_s64), vdupq_n_s64>; - -template <typename TOp, typename TFunc, TFunc* func, typename... TParams> -struct TWrapperDual : TBaseWrapper<__m128i> { +using _mm_slli_epi16 =  +    TWrapperSingleDup<uint16x8_t, decltype(vshlq_u16), vshlq_u16,  +                      decltype(vdupq_n_s16), vdupq_n_s16>;  +using _mm_slli_epi32 =  +    TWrapperSingleDup<uint32x4_t, decltype(vshlq_u32), vshlq_u32,  +                      decltype(vdupq_n_s32), vdupq_n_s32>;  +using _mm_slli_epi64 =  +    TWrapperSingleDup<uint64x2_t, decltype(vshlq_u64), vshlq_u64,  +                      decltype(vdupq_n_s64), vdupq_n_s64>;  +  +template <typename TOp, typename TFunc, TFunc* func, typename... TParams>  +struct TWrapperDual : TBaseWrapper<__m128i> {       Y_FORCE_INLINE -    TWrapperDual(const __m128i& op1, const __m128i& op2, TParams... params) { -        TQType<TOp>::As(Value) = (TOp) -            func(TQType<TOp>::As(op1), -                 TQType<TOp>::As(op2), -                 params...); -    } -}; - -template <typename TOp, typename TFunc, TFunc* func, typename... TParams> -struct TWrapperDualSwap : TBaseWrapper<__m128i> { +    TWrapperDual(const __m128i& op1, const __m128i& op2, TParams... params) {  +        TQType<TOp>::As(Value) = (TOp)  +            func(TQType<TOp>::As(op1),  +                 TQType<TOp>::As(op2),  +                 params...);  +    }  +};  +  +template <typename TOp, typename TFunc, TFunc* func, typename... TParams>  +struct TWrapperDualSwap : TBaseWrapper<__m128i> {       Y_FORCE_INLINE -    TWrapperDualSwap(const __m128i& op1, const __m128i& op2, TParams... params) { -        TQType<TOp>::As(Value) = -            func(TQType<TOp>::As(op2), -                 TQType<TOp>::As(op1), -                 params...); -    } -}; - +    TWrapperDualSwap(const __m128i& op1, const __m128i& op2, TParams... params) {  +        TQType<TOp>::As(Value) =  +            func(TQType<TOp>::As(op2),  +                 TQType<TOp>::As(op1),  +                 params...);  +    }  +};  +   template <typename TOp, typename TFunc, TFunc* func, typename TArgument = __m128>  struct TWrapperDualF : TBaseWrapper<TArgument> {      Y_FORCE_INLINE      TWrapperDualF(const TArgument& op1, const TArgument& op2) {          TQType<TOp>::As(TBaseWrapper<TArgument>::Value) = (TOp) func(TQType<TOp>::As(op1), TQType<TOp>::As(op2)); -    } -}; - -using _mm_or_si128 = TWrapperDual<uint64x2_t, decltype(vorrq_u64), vorrq_u64>; -using _mm_and_si128 = TWrapperDual<uint64x2_t, decltype(vandq_u64), vandq_u64>; -using _mm_andnot_si128 = -    TWrapperDualSwap<uint64x2_t, decltype(vbicq_u64), vbicq_u64>; +    }  +};  +  +using _mm_or_si128 = TWrapperDual<uint64x2_t, decltype(vorrq_u64), vorrq_u64>;  +using _mm_and_si128 = TWrapperDual<uint64x2_t, decltype(vandq_u64), vandq_u64>;  +using _mm_andnot_si128 =  +    TWrapperDualSwap<uint64x2_t, decltype(vbicq_u64), vbicq_u64>;   using _mm_xor_si128 = TWrapperDual<uint64x2_t, decltype(veorq_u64), veorq_u64>; - +   using _mm_add_epi8 = TWrapperDual<uint8x16_t, decltype(vaddq_u8), vaddq_u8>; -using _mm_add_epi16 = TWrapperDual<uint16x8_t, decltype(vaddq_u16), vaddq_u16>; -using _mm_add_epi32 = TWrapperDual<uint32x4_t, decltype(vaddq_u32), vaddq_u32>; -using _mm_add_epi64 = TWrapperDual<uint64x2_t, decltype(vaddq_u64), vaddq_u64>; - +using _mm_add_epi16 = TWrapperDual<uint16x8_t, decltype(vaddq_u16), vaddq_u16>;  +using _mm_add_epi32 = TWrapperDual<uint32x4_t, decltype(vaddq_u32), vaddq_u32>;  +using _mm_add_epi64 = TWrapperDual<uint64x2_t, decltype(vaddq_u64), vaddq_u64>;  +   inline __m128i _mm_madd_epi16(__m128i a, __m128i b) {      int32x4_t aLow;      int32x4_t aHigh; @@ -343,118 +343,118 @@ inline __m128i _mm_madd_epi16(__m128i a, __m128i b) {  }  using _mm_sub_epi8 = TWrapperDual<uint8x16_t, decltype(vsubq_u8), vsubq_u8>; -using _mm_sub_epi16 = TWrapperDual<uint16x8_t, decltype(vsubq_u16), vsubq_u16>; -using _mm_sub_epi32 = TWrapperDual<uint32x4_t, decltype(vsubq_u32), vsubq_u32>; -using _mm_sub_epi64 = TWrapperDual<uint64x2_t, decltype(vsubq_u64), vsubq_u64>; - -using _mm_unpacklo_epi8 = -    TWrapperDual<uint8x16_t, decltype(vzip1q_u8), vzip1q_u8>; -using _mm_unpackhi_epi8 = -    TWrapperDual<uint8x16_t, decltype(vzip2q_u8), vzip2q_u8>; -using _mm_unpacklo_epi16 = -    TWrapperDual<uint16x8_t, decltype(vzip1q_u16), vzip1q_u16>; -using _mm_unpackhi_epi16 = -    TWrapperDual<uint16x8_t, decltype(vzip2q_u16), vzip2q_u16>; -using _mm_unpacklo_epi32 = -    TWrapperDual<uint32x4_t, decltype(vzip1q_u32), vzip1q_u32>; -using _mm_unpackhi_epi32 = -    TWrapperDual<uint32x4_t, decltype(vzip2q_u32), vzip2q_u32>; -using _mm_unpacklo_epi64 = -    TWrapperDual<uint64x2_t, decltype(vzip1q_u64), vzip1q_u64>; -using _mm_unpackhi_epi64 = -    TWrapperDual<uint64x2_t, decltype(vzip2q_u64), vzip2q_u64>; - -using _mm_cmpeq_epi8 = -    TWrapperDual<uint8x16_t, decltype(vceqq_u8), vceqq_u8>; -using _mm_cmpeq_epi16 = -    TWrapperDual<uint16x8_t, decltype(vceqq_u16), vceqq_u16>; -using _mm_cmpeq_epi32 = -    TWrapperDual<uint32x4_t, decltype(vceqq_u32), vceqq_u32>; - -using _mm_cmpgt_epi8 = -    TWrapperDual<int8x16_t, decltype(vcgtq_s8), vcgtq_s8>; -using _mm_cmpgt_epi16 = -    TWrapperDual<int16x8_t, decltype(vcgtq_s16), vcgtq_s16>; -using _mm_cmpgt_epi32 = -    TWrapperDual<int32x4_t, decltype(vcgtq_s32), vcgtq_s32>; - -using _mm_cmplt_epi8 = -    TWrapperDual<int8x16_t, decltype(vcltq_s8), vcltq_s8>; -using _mm_cmplt_epi16 = -    TWrapperDual<int16x8_t, decltype(vcltq_s16), vcltq_s16>; -using _mm_cmplt_epi32 = -    TWrapperDual<int32x4_t, decltype(vcltq_s32), vcltq_s32>; - +using _mm_sub_epi16 = TWrapperDual<uint16x8_t, decltype(vsubq_u16), vsubq_u16>;  +using _mm_sub_epi32 = TWrapperDual<uint32x4_t, decltype(vsubq_u32), vsubq_u32>;  +using _mm_sub_epi64 = TWrapperDual<uint64x2_t, decltype(vsubq_u64), vsubq_u64>;  +  +using _mm_unpacklo_epi8 =  +    TWrapperDual<uint8x16_t, decltype(vzip1q_u8), vzip1q_u8>;  +using _mm_unpackhi_epi8 =  +    TWrapperDual<uint8x16_t, decltype(vzip2q_u8), vzip2q_u8>;  +using _mm_unpacklo_epi16 =  +    TWrapperDual<uint16x8_t, decltype(vzip1q_u16), vzip1q_u16>;  +using _mm_unpackhi_epi16 =  +    TWrapperDual<uint16x8_t, decltype(vzip2q_u16), vzip2q_u16>;  +using _mm_unpacklo_epi32 =  +    TWrapperDual<uint32x4_t, decltype(vzip1q_u32), vzip1q_u32>;  +using _mm_unpackhi_epi32 =  +    TWrapperDual<uint32x4_t, decltype(vzip2q_u32), vzip2q_u32>;  +using _mm_unpacklo_epi64 =  +    TWrapperDual<uint64x2_t, decltype(vzip1q_u64), vzip1q_u64>;  +using _mm_unpackhi_epi64 =  +    TWrapperDual<uint64x2_t, decltype(vzip2q_u64), vzip2q_u64>;  +  +using _mm_cmpeq_epi8 =  +    TWrapperDual<uint8x16_t, decltype(vceqq_u8), vceqq_u8>;  +using _mm_cmpeq_epi16 =  +    TWrapperDual<uint16x8_t, decltype(vceqq_u16), vceqq_u16>;  +using _mm_cmpeq_epi32 =  +    TWrapperDual<uint32x4_t, decltype(vceqq_u32), vceqq_u32>;  +  +using _mm_cmpgt_epi8 =  +    TWrapperDual<int8x16_t, decltype(vcgtq_s8), vcgtq_s8>;  +using _mm_cmpgt_epi16 =  +    TWrapperDual<int16x8_t, decltype(vcgtq_s16), vcgtq_s16>;  +using _mm_cmpgt_epi32 =  +    TWrapperDual<int32x4_t, decltype(vcgtq_s32), vcgtq_s32>;  +  +using _mm_cmplt_epi8 =  +    TWrapperDual<int8x16_t, decltype(vcltq_s8), vcltq_s8>;  +using _mm_cmplt_epi16 =  +    TWrapperDual<int16x8_t, decltype(vcltq_s16), vcltq_s16>;  +using _mm_cmplt_epi32 =  +    TWrapperDual<int32x4_t, decltype(vcltq_s32), vcltq_s32>;  +   Y_FORCE_INLINE __m128i _mm_load_si128(const __m128i* ptr) { -    __m128i result; +    __m128i result;       result.AsUi64x2 = vld1q_u64((const uint64_t*)ptr); -    return result; -} - +    return result;  +}  +   Y_FORCE_INLINE __m128i _mm_loadu_si128(const __m128i* ptr) { -    __m128i result; +    __m128i result;       result.AsUi64x2 = vld1q_u64((const uint64_t*)ptr); -    return result; -} - +    return result;  +}  +   Y_FORCE_INLINE __m128i _mm_lddqu_si128(const __m128i* ptr) {      return _mm_loadu_si128(ptr);  }  Y_FORCE_INLINE void _mm_storeu_si128(__m128i* ptr, const __m128i& op) {      vst1q_u64((uint64_t*)ptr, op.AsUi64x2); -} - +}  +   Y_FORCE_INLINE void -_mm_store_si128(__m128i* ptr, const __m128i& op) { +_mm_store_si128(__m128i* ptr, const __m128i& op) {       vst1q_u64((uint64_t*)ptr, op.AsUi64x2); -} - -template <typename TOp, typename TFunc, TFunc* func, typename... TParams> -struct TWrapperSimple : TBaseWrapper<__m128i> { +}  +  +template <typename TOp, typename TFunc, TFunc* func, typename... TParams>  +struct TWrapperSimple : TBaseWrapper<__m128i> {       Y_FORCE_INLINE -    TWrapperSimple(TParams... params) { -        TQType<TOp>::As(Value) = func(params...); -    } -}; - -template <typename TOp, typename TFunc, TFunc* func, typename... TParams> -struct TWrapperSimpleF : TBaseWrapper<__m128> { +    TWrapperSimple(TParams... params) {  +        TQType<TOp>::As(Value) = func(params...);  +    }  +};  +  +template <typename TOp, typename TFunc, TFunc* func, typename... TParams>  +struct TWrapperSimpleF : TBaseWrapper<__m128> {       Y_FORCE_INLINE -    TWrapperSimpleF(TParams... params) { -        TQType<TOp>::As(Value) = func(params...); -    } -}; - -using _mm_set1_epi8 = -    TWrapperSimple<int8x16_t, decltype(vdupq_n_s8), vdupq_n_s8, const char>; -using _mm_set1_epi16 = -    TWrapperSimple<int16x8_t, decltype(vdupq_n_s16), vdupq_n_s16, const ui16>; -using _mm_set1_epi32 = -    TWrapperSimple<int32x4_t, decltype(vdupq_n_s32), vdupq_n_s32, const ui32>; - -struct _mm_setzero_si128 : TBaseWrapper<__m128i> { +    TWrapperSimpleF(TParams... params) {  +        TQType<TOp>::As(Value) = func(params...);  +    }  +};  +  +using _mm_set1_epi8 =  +    TWrapperSimple<int8x16_t, decltype(vdupq_n_s8), vdupq_n_s8, const char>;  +using _mm_set1_epi16 =  +    TWrapperSimple<int16x8_t, decltype(vdupq_n_s16), vdupq_n_s16, const ui16>;  +using _mm_set1_epi32 =  +    TWrapperSimple<int32x4_t, decltype(vdupq_n_s32), vdupq_n_s32, const ui32>;  +  +struct _mm_setzero_si128 : TBaseWrapper<__m128i> {       Y_FORCE_INLINE -    _mm_setzero_si128() { -        TQType<uint64x2_t>::As(Value) = vdupq_n_u64(0); -    } -}; - -struct _mm_loadl_epi64 : TBaseWrapper<__m128i> { +    _mm_setzero_si128() {  +        TQType<uint64x2_t>::As(Value) = vdupq_n_u64(0);  +    }  +};  +  +struct _mm_loadl_epi64 : TBaseWrapper<__m128i> {       Y_FORCE_INLINE -    _mm_loadl_epi64(const __m128i* p) { +    _mm_loadl_epi64(const __m128i* p) {           uint64x1_t im = vld1_u64((const uint64_t*)p); -        TQType<uint64x2_t>::As(Value) = vcombine_u64(im, vdup_n_u64(0)); -    } -}; - -struct _mm_storel_epi64 : TBaseWrapper<__m128i> { +        TQType<uint64x2_t>::As(Value) = vcombine_u64(im, vdup_n_u64(0));  +    }  +};  +  +struct _mm_storel_epi64 : TBaseWrapper<__m128i> {       Y_FORCE_INLINE -    _mm_storel_epi64(__m128i* a, __m128i op) { +    _mm_storel_epi64(__m128i* a, __m128i op) {           vst1_u64((uint64_t*)a, vget_low_u64(op.AsUi64x2)); -    } -}; - +    }  +};  +   struct ShuffleStruct4 {      ui8 x[4];  }; @@ -470,45 +470,45 @@ _MM_SHUFFLE(ui8 x4, ui8 x3, ui8 x2, ui8 x1) {  }  Y_FORCE_INLINE __m128i -_mm_shuffle_epi32(const __m128i& op1, const ShuffleStruct4& op2) { -    __m128i result; +_mm_shuffle_epi32(const __m128i& op1, const ShuffleStruct4& op2) {  +    __m128i result;       const ui8 xi[4] = {          ui8(op2.x[0] * 4), ui8(op2.x[1] * 4),          ui8(op2.x[2] * 4), ui8(op2.x[3] * 4)      };      const uint8x16_t transform = { -        ui8(xi[0]), ui8(xi[0] + 1), ui8(xi[0] + 2), ui8(xi[0] + 3), -        ui8(xi[1]), ui8(xi[1] + 1), ui8(xi[1] + 2), ui8(xi[1] + 3), -        ui8(xi[2]), ui8(xi[2] + 1), ui8(xi[2] + 2), ui8(xi[2] + 3), +        ui8(xi[0]), ui8(xi[0] + 1), ui8(xi[0] + 2), ui8(xi[0] + 3),  +        ui8(xi[1]), ui8(xi[1] + 1), ui8(xi[1] + 2), ui8(xi[1] + 3),  +        ui8(xi[2]), ui8(xi[2] + 1), ui8(xi[2] + 2), ui8(xi[2] + 3),           ui8(xi[3]), ui8(xi[3] + 1), ui8(xi[3] + 2), ui8(xi[3] + 3)      }; -    result.AsUi8x16 = vqtbl1q_u8(op1.AsUi8x16, transform); -    return result; -} - +    result.AsUi8x16 = vqtbl1q_u8(op1.AsUi8x16, transform);  +    return result;  +}  +   Y_FORCE_INLINE int -_mm_movemask_epi8(const __m128i& op) { -    uint8x16_t mask = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, -                       0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}; -    uint8x16_t opmasked = vandq_u8(op.AsUi8x16, mask); -    int8x16_t byteshifter = { -        0, -7, 0, -7, 0, -7, 0, -7, 0, -7, 0, -7, 0, -7, 0, -7}; -    uint8x16_t opshifted = vshlq_u8(opmasked, byteshifter); -    int16x8_t wordshifter = {-7, -5, -3, -1, 1, 3, 5, 7}; -    uint16x8_t wordshifted = -        vshlq_u16(vreinterpretq_u16_u8(opshifted), wordshifter); -    return vaddvq_u16(wordshifted); -} - -template <int imm> -struct THelper_mm_srli_si128 : TBaseWrapper<__m128i> { +_mm_movemask_epi8(const __m128i& op) {  +    uint8x16_t mask = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,  +                       0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};  +    uint8x16_t opmasked = vandq_u8(op.AsUi8x16, mask);  +    int8x16_t byteshifter = {  +        0, -7, 0, -7, 0, -7, 0, -7, 0, -7, 0, -7, 0, -7, 0, -7};  +    uint8x16_t opshifted = vshlq_u8(opmasked, byteshifter);  +    int16x8_t wordshifter = {-7, -5, -3, -1, 1, 3, 5, 7};  +    uint16x8_t wordshifted =  +        vshlq_u16(vreinterpretq_u16_u8(opshifted), wordshifter);  +    return vaddvq_u16(wordshifted);  +}  +  +template <int imm>  +struct THelper_mm_srli_si128 : TBaseWrapper<__m128i> {       Y_FORCE_INLINE      THelper_mm_srli_si128(const __m128i a) {          const auto zero = vdupq_n_u8(0); -        TQType<uint8x16_t>::As(Value) = vextq_u8(a.AsUi8x16, zero, imm); -    } -}; - +        TQType<uint8x16_t>::As(Value) = vextq_u8(a.AsUi8x16, zero, imm);  +    }  +};  +   template <>  struct THelper_mm_srli_si128<16> : TBaseWrapper<__m128i> {      Y_FORCE_INLINE @@ -518,8 +518,8 @@ struct THelper_mm_srli_si128<16> : TBaseWrapper<__m128i> {      }  }; -#define _mm_srli_si128(a, imm) THelper_mm_srli_si128<imm>(a) - +#define _mm_srli_si128(a, imm) THelper_mm_srli_si128<imm>(a)  +   template<int imm>  inline uint8x16_t vextq_u8_function(uint8x16_t a, uint8x16_t b) {      return vextq_u8(a, b, imm); @@ -531,33 +531,33 @@ inline uint8x16_t vextq_u8_function<16>(uint8x16_t /* a */, uint8x16_t b) {  } -template <int imm> -struct THelper_mm_slli_si128 : TBaseWrapper<__m128i> { +template <int imm>  +struct THelper_mm_slli_si128 : TBaseWrapper<__m128i> {       Y_FORCE_INLINE      THelper_mm_slli_si128(const __m128i a) { -        auto zero = vdupq_n_u8(0); +        auto zero = vdupq_n_u8(0);           TQType<uint8x16_t>::As(Value) = vextq_u8_function<16 - imm>(zero, a.AsUi8x16); -    } -}; - -#define _mm_slli_si128(a, imm) THelper_mm_slli_si128<imm>(a) - +    }  +};  +  +#define _mm_slli_si128(a, imm) THelper_mm_slli_si128<imm>(a)  +   Y_FORCE_INLINE int _mm_cvtsi128_si32(const __m128i& op) { -    return vgetq_lane_s32(op.AsSi32x4, 0); -} - -struct _mm_set_epi16 : TBaseWrapper<__m128i> { +    return vgetq_lane_s32(op.AsSi32x4, 0);  +}  +  +struct _mm_set_epi16 : TBaseWrapper<__m128i> {       Y_FORCE_INLINE -    _mm_set_epi16(const short w7, const short w6, -                  const short w5, const short w4, -                  const short w3, const short w2, -                  const short w1, const short w0) { -        int16x4_t d0 = {w0, w1, w2, w3}; -        int16x4_t d1 = {w4, w5, w6, w7}; -        TQType<int16x8_t>::As(Value) = vcombine_s16(d0, d1); -    } -}; - +    _mm_set_epi16(const short w7, const short w6,  +                  const short w5, const short w4,  +                  const short w3, const short w2,  +                  const short w1, const short w0) {  +        int16x4_t d0 = {w0, w1, w2, w3};  +        int16x4_t d1 = {w4, w5, w6, w7};  +        TQType<int16x8_t>::As(Value) = vcombine_s16(d0, d1);  +    }  +};  +   struct _mm_setr_epi16 : TBaseWrapper<__m128i> {      Y_FORCE_INLINE      _mm_setr_epi16(const short w7, const short w6, @@ -570,16 +570,16 @@ struct _mm_setr_epi16 : TBaseWrapper<__m128i> {      }  }; -struct _mm_set_epi32 : TBaseWrapper<__m128i> { +struct _mm_set_epi32 : TBaseWrapper<__m128i> {       Y_FORCE_INLINE -    _mm_set_epi32(const int x3, const int x2, -                  const int x1, const int x0) { -        int32x2_t d0 = {x0, x1}; -        int32x2_t d1 = {x2, x3}; -        TQType<int32x4_t>::As(Value) = vcombine_s32(d0, d1); -    } -}; - +    _mm_set_epi32(const int x3, const int x2,  +                  const int x1, const int x0) {  +        int32x2_t d0 = {x0, x1};  +        int32x2_t d1 = {x2, x3};  +        TQType<int32x4_t>::As(Value) = vcombine_s32(d0, d1);  +    }  +};  +   struct _mm_setr_epi32 : TBaseWrapper<__m128i> {      Y_FORCE_INLINE      _mm_setr_epi32(const int x3, const int x2, @@ -590,14 +590,14 @@ struct _mm_setr_epi32 : TBaseWrapper<__m128i> {      }  }; -struct _mm_cvtsi32_si128 : TBaseWrapper<__m128i> { +struct _mm_cvtsi32_si128 : TBaseWrapper<__m128i> {       Y_FORCE_INLINE -    _mm_cvtsi32_si128(int op) { -        auto zero = vdupq_n_s32(0); -        TQType<int32x4_t>::As(Value) = vsetq_lane_s32(op, zero, 0); -    } -}; - +    _mm_cvtsi32_si128(int op) {  +        auto zero = vdupq_n_s32(0);  +        TQType<int32x4_t>::As(Value) = vsetq_lane_s32(op, zero, 0);  +    }  +};  +   struct _mm_cvtsi64_si128 : TBaseWrapper<__m128i> {      Y_FORCE_INLINE      _mm_cvtsi64_si128(i64 op) { @@ -606,41 +606,41 @@ struct _mm_cvtsi64_si128 : TBaseWrapper<__m128i> {      }  }; -template <typename TOpOut, typename TOpIn, -          typename TFunc, TFunc* func, -          typename TCombine, TCombine* combine> -struct TCombineWrapper : TBaseWrapper<__m128i> { +template <typename TOpOut, typename TOpIn,  +          typename TFunc, TFunc* func,  +          typename TCombine, TCombine* combine>  +struct TCombineWrapper : TBaseWrapper<__m128i> {       Y_FORCE_INLINE -    TCombineWrapper(const __m128i op1, const __m128i op2) { -        TQType<TOpOut>::As(Value) = -            combine(func(TQType<TOpIn>::As(op1)), -                    func(TQType<TOpIn>::As(op2))); -    } -}; - -using _mm_packs_epi16 = -    TCombineWrapper<int8x16_t, int16x8_t, -                    decltype(vqmovn_s16), vqmovn_s16, -                    decltype(vcombine_s8), vcombine_s8>; -using _mm_packs_epi32 = -    TCombineWrapper<int16x8_t, int32x4_t, -                    decltype(vqmovn_s32), vqmovn_s32, -                    decltype(vcombine_s16), vcombine_s16>; -using _mm_packus_epi16 = -    TCombineWrapper<uint8x16_t, int16x8_t, -                    decltype(vqmovun_s16), vqmovun_s16, -                    decltype(vcombine_u8), vcombine_u8>; - -template <typename TOpOut, typename TOpIn, -          typename TFunc, TFunc* func, typename... TParams> -struct TScalarOutWrapper : TBaseWrapper<TOpOut> { +    TCombineWrapper(const __m128i op1, const __m128i op2) {  +        TQType<TOpOut>::As(Value) =  +            combine(func(TQType<TOpIn>::As(op1)),  +                    func(TQType<TOpIn>::As(op2)));  +    }  +};  +  +using _mm_packs_epi16 =  +    TCombineWrapper<int8x16_t, int16x8_t,  +                    decltype(vqmovn_s16), vqmovn_s16,  +                    decltype(vcombine_s8), vcombine_s8>;  +using _mm_packs_epi32 =  +    TCombineWrapper<int16x8_t, int32x4_t,  +                    decltype(vqmovn_s32), vqmovn_s32,  +                    decltype(vcombine_s16), vcombine_s16>;  +using _mm_packus_epi16 =  +    TCombineWrapper<uint8x16_t, int16x8_t,  +                    decltype(vqmovun_s16), vqmovun_s16,  +                    decltype(vcombine_u8), vcombine_u8>;  +  +template <typename TOpOut, typename TOpIn,  +          typename TFunc, TFunc* func, typename... TParams>  +struct TScalarOutWrapper : TBaseWrapper<TOpOut> {       Y_FORCE_INLINE -    TScalarOutWrapper(const __m128i op, TParams... params) { -        TBaseWrapper<TOpOut>::Value = -            func(TQType<TOpIn>::As(op), params...); -    } -}; - +    TScalarOutWrapper(const __m128i op, TParams... params) {  +        TBaseWrapper<TOpOut>::Value =  +            func(TQType<TOpIn>::As(op), params...);  +    }  +};  +   template<int imm>  int extract_epi8_arm(__m128i arg) {      return vgetq_lane_u8(arg.AsUi8x16, imm); @@ -649,13 +649,13 @@ int extract_epi8_arm(__m128i arg) {  template<int imm>  int extract_epi16_arm(__m128i arg) {      return vgetq_lane_u16(arg.AsUi16x8, imm); -} - +}  +   template<int imm>  int extract_epi32_arm(__m128i arg) {      return vgetq_lane_s32(arg.AsSi32x4, imm);  } - +   template<int imm>  long long extract_epi64_arm(__m128i arg) {      return vgetq_lane_s64(arg.AsSi64x2, imm); @@ -669,49 +669,49 @@ long long extract_epi64_arm(__m128i arg) {  static Y_FORCE_INLINE  __m128i _mm_mul_epu32(__m128i op1, __m128i op2) { -    __m128i result; -    uint32x4_t r1 = vuzp1q_u32(op1.AsUi32x4, op2.AsUi32x4); -    uint32x4_t r2 = vuzp1q_u32(op2.AsUi32x4, op1.AsUi32x4); -    result.AsUi64x2 = vmull_u32(vget_low_u32(r1), vget_low_u32(r2)); -    return result; -} - -template <> -struct TQType<float32x4_t> { -    static inline float32x4_t& As(__m128& value) { -        return value.AsFloat32x4; -    } - -    static inline const float32x4_t& As(const __m128& value) { -        return value.AsFloat32x4; -    } - -    static inline float32x4_t& As(__m128i& value) { -        return value.AsFloat32x4; -    } - -    static inline const float32x4_t& As(const __m128i& value) { -        return value.AsFloat32x4; -    } -}; - -template <> -struct TQType<float64x2_t> { -    static inline float64x2_t& As(__m128& value) { -        return value.AsFloat64x2; -    } - -    static inline const float64x2_t& As(const __m128& value) { -        return value.AsFloat64x2; -    } - -    static inline float64x2_t& As(__m128i& value) { -        return value.AsFloat64x2; -    } - -    static inline const float64x2_t& As(const __m128i& value) { -        return value.AsFloat64x2; -    } +    __m128i result;  +    uint32x4_t r1 = vuzp1q_u32(op1.AsUi32x4, op2.AsUi32x4);  +    uint32x4_t r2 = vuzp1q_u32(op2.AsUi32x4, op1.AsUi32x4);  +    result.AsUi64x2 = vmull_u32(vget_low_u32(r1), vget_low_u32(r2));  +    return result;  +}  +  +template <>  +struct TQType<float32x4_t> {  +    static inline float32x4_t& As(__m128& value) {  +        return value.AsFloat32x4;  +    }  +  +    static inline const float32x4_t& As(const __m128& value) {  +        return value.AsFloat32x4;  +    }  +  +    static inline float32x4_t& As(__m128i& value) {  +        return value.AsFloat32x4;  +    }  +  +    static inline const float32x4_t& As(const __m128i& value) {  +        return value.AsFloat32x4;  +    }  +};  +  +template <>  +struct TQType<float64x2_t> {  +    static inline float64x2_t& As(__m128& value) {  +        return value.AsFloat64x2;  +    }  +  +    static inline const float64x2_t& As(const __m128& value) {  +        return value.AsFloat64x2;  +    }  +  +    static inline float64x2_t& As(__m128i& value) {  +        return value.AsFloat64x2;  +    }  +  +    static inline const float64x2_t& As(const __m128i& value) {  +        return value.AsFloat64x2;  +    }       static inline float64x2_t& As(__m128d& value) {          return value; @@ -720,30 +720,30 @@ struct TQType<float64x2_t> {      static inline const float64x2_t& As(const __m128d& value) {          return value;      } -}; - -using _mm_set1_ps = TWrapperSimpleF<float32x4_t, -                                    decltype(vdupq_n_f32), vdupq_n_f32, const float>; -using _mm_set_ps1 = TWrapperSimpleF<float32x4_t, -                                    decltype(vdupq_n_f32), vdupq_n_f32, const float>; - -struct _mm_setzero_ps : TBaseWrapper<__m128> { +};  +  +using _mm_set1_ps = TWrapperSimpleF<float32x4_t,  +                                    decltype(vdupq_n_f32), vdupq_n_f32, const float>;  +using _mm_set_ps1 = TWrapperSimpleF<float32x4_t,  +                                    decltype(vdupq_n_f32), vdupq_n_f32, const float>;  +  +struct _mm_setzero_ps : TBaseWrapper<__m128> {       Y_FORCE_INLINE -    _mm_setzero_ps() { -        TQType<float32x4_t>::As(Value) = vdupq_n_f32(0.); -    } -}; - +    _mm_setzero_ps() {  +        TQType<float32x4_t>::As(Value) = vdupq_n_f32(0.);  +    }  +};  +   Y_FORCE_INLINE __m128d _mm_setzero_pd() {      return vdupq_n_f64(0.);  }  Y_FORCE_INLINE __m128 _mm_loadu_ps(const float* ptr) { -    __m128 result; -    result.AsFloat32x4 = vld1q_f32(ptr); -    return result; -} - +    __m128 result;  +    result.AsFloat32x4 = vld1q_f32(ptr);  +    return result;  +}  +   Y_FORCE_INLINE __m128 _mm_load_ps(const float* ptr) {      __m128 result;      result.AsFloat32x4 = vld1q_f32(ptr); @@ -751,23 +751,23 @@ Y_FORCE_INLINE __m128 _mm_load_ps(const float* ptr) {  }  Y_FORCE_INLINE void _mm_storeu_ps(float* ptr, const __m128& op) { -    vst1q_f32(ptr, op.AsFloat32x4); -} - +    vst1q_f32(ptr, op.AsFloat32x4);  +}  +   Y_FORCE_INLINE void _mm_store_ps(float* ptr, const __m128& op) {      vst1q_f32(ptr, op.AsFloat32x4);  } -struct _mm_set_ps : TBaseWrapper<__m128> { +struct _mm_set_ps : TBaseWrapper<__m128> {       Y_FORCE_INLINE -    _mm_set_ps(const float x3, const float x2, -               const float x1, const float x0) { -        float32x2_t d0 = {x0, x1}; -        float32x2_t d1 = {x2, x3}; -        TQType<float32x4_t>::As(Value) = vcombine_f32(d0, d1); -    } -}; - +    _mm_set_ps(const float x3, const float x2,  +               const float x1, const float x0) {  +        float32x2_t d0 = {x0, x1};  +        float32x2_t d1 = {x2, x3};  +        TQType<float32x4_t>::As(Value) = vcombine_f32(d0, d1);  +    }  +};  +   Y_FORCE_INLINE __m128d _mm_set_pd(double d1, double d0) {      const float64x1_t p0 = {d0};      const float64x1_t p1 = {d1}; @@ -788,81 +788,81 @@ Y_FORCE_INLINE void _mm_store_pd(double* res, __m128d a) {      vst1q_f64(res, a);  } -using _mm_add_ps = TWrapperDualF<float32x4_t, decltype(vaddq_f32), vaddq_f32>; -using _mm_sub_ps = TWrapperDualF<float32x4_t, decltype(vsubq_f32), vsubq_f32>; -using _mm_mul_ps = TWrapperDualF<float32x4_t, decltype(vmulq_f32), vmulq_f32>; -using _mm_div_ps = TWrapperDualF<float32x4_t, decltype(vdivq_f32), vdivq_f32>; -using _mm_cmpeq_ps = TWrapperDualF<float32x4_t, decltype(vceqq_f32), vceqq_f32>; -using _mm_cmpgt_ps = TWrapperDualF<float32x4_t, decltype(vcgtq_f32), vcgtq_f32>; -using _mm_max_ps = TWrapperDualF<float32x4_t, decltype(vmaxq_f32), vmaxq_f32>; -using _mm_min_ps = TWrapperDualF<float32x4_t, decltype(vminq_f32), vminq_f32>; - +using _mm_add_ps = TWrapperDualF<float32x4_t, decltype(vaddq_f32), vaddq_f32>;  +using _mm_sub_ps = TWrapperDualF<float32x4_t, decltype(vsubq_f32), vsubq_f32>;  +using _mm_mul_ps = TWrapperDualF<float32x4_t, decltype(vmulq_f32), vmulq_f32>;  +using _mm_div_ps = TWrapperDualF<float32x4_t, decltype(vdivq_f32), vdivq_f32>;  +using _mm_cmpeq_ps = TWrapperDualF<float32x4_t, decltype(vceqq_f32), vceqq_f32>;  +using _mm_cmpgt_ps = TWrapperDualF<float32x4_t, decltype(vcgtq_f32), vcgtq_f32>;  +using _mm_max_ps = TWrapperDualF<float32x4_t, decltype(vmaxq_f32), vmaxq_f32>;  +using _mm_min_ps = TWrapperDualF<float32x4_t, decltype(vminq_f32), vminq_f32>;  +   using _mm_add_pd = TWrapperDualF<float64x2_t, decltype(vaddq_f64), vaddq_f64, __m128d>;  using _mm_sub_pd = TWrapperDualF<float64x2_t, decltype(vsubq_f64), vsubq_f64, __m128d>;  using _mm_mul_pd = TWrapperDualF<float64x2_t, decltype(vmulq_f64), vmulq_f64, __m128d>;  using _mm_div_pd = TWrapperDualF<float64x2_t, decltype(vdivq_f64), vdivq_f64, __m128d>; -struct _mm_and_ps : TBaseWrapper<__m128> { +struct _mm_and_ps : TBaseWrapper<__m128> {       Y_FORCE_INLINE -    _mm_and_ps(const __m128& op1, const __m128& op2) { -        TQType<uint64x2_t>::As(Value) = -            vandq_u64(TQType<uint64x2_t>::As(op1), -                      TQType<uint64x2_t>::As(op2)); -    } -}; - +    _mm_and_ps(const __m128& op1, const __m128& op2) {  +        TQType<uint64x2_t>::As(Value) =  +            vandq_u64(TQType<uint64x2_t>::As(op1),  +                      TQType<uint64x2_t>::As(op2));  +    }  +};  +   Y_FORCE_INLINE __m128d _mm_and_pd(__m128d a, __m128d b) {      return vandq_u64(a, b);  }  Y_FORCE_INLINE void _MM_TRANSPOSE4_PS(__m128& op0, __m128& op1, __m128& op2, __m128& op3) { -    float64x2_t im0 = -        (float64x2_t)vtrn1q_f32(op0.AsFloat32x4, op1.AsFloat32x4); -    float64x2_t im1 = -        (float64x2_t)vtrn2q_f32(op0.AsFloat32x4, op1.AsFloat32x4); -    float64x2_t im2 = -        (float64x2_t)vtrn1q_f32(op2.AsFloat32x4, op3.AsFloat32x4); -    float64x2_t im3 = -        (float64x2_t)vtrn2q_f32(op2.AsFloat32x4, op3.AsFloat32x4); - -    TQType<float64x2_t>::As(op0) = vtrn1q_f64(im0, im2); -    TQType<float64x2_t>::As(op1) = vtrn1q_f64(im1, im3); -    TQType<float64x2_t>::As(op2) = vtrn2q_f64(im0, im2); -    TQType<float64x2_t>::As(op3) = vtrn2q_f64(im1, im3); -}; - +    float64x2_t im0 =  +        (float64x2_t)vtrn1q_f32(op0.AsFloat32x4, op1.AsFloat32x4);  +    float64x2_t im1 =  +        (float64x2_t)vtrn2q_f32(op0.AsFloat32x4, op1.AsFloat32x4);  +    float64x2_t im2 =  +        (float64x2_t)vtrn1q_f32(op2.AsFloat32x4, op3.AsFloat32x4);  +    float64x2_t im3 =  +        (float64x2_t)vtrn2q_f32(op2.AsFloat32x4, op3.AsFloat32x4);  +  +    TQType<float64x2_t>::As(op0) = vtrn1q_f64(im0, im2);  +    TQType<float64x2_t>::As(op1) = vtrn1q_f64(im1, im3);  +    TQType<float64x2_t>::As(op2) = vtrn2q_f64(im0, im2);  +    TQType<float64x2_t>::As(op3) = vtrn2q_f64(im1, im3);  +};  +   Y_FORCE_INLINE __m128 _mm_castsi128_ps(__m128i op) { -    return reinterpret_cast<__m128&>(op); -} - +    return reinterpret_cast<__m128&>(op);  +}  +   Y_FORCE_INLINE __m128i _mm_castps_si128(__m128 op) { -    return reinterpret_cast<__m128i&>(op); -} - -template <typename TOpOut, typename TOpIn, -          typename TFunc, TFunc* func, typename... TParams> -struct TCvtS2FWrapperSingle : TBaseWrapper<__m128> { +    return reinterpret_cast<__m128i&>(op);  +}  +  +template <typename TOpOut, typename TOpIn,  +          typename TFunc, TFunc* func, typename... TParams>  +struct TCvtS2FWrapperSingle : TBaseWrapper<__m128> {       Y_FORCE_INLINE -    TCvtS2FWrapperSingle(const __m128i& op, TParams... params) { -        TQType<TOpOut>::As(Value) = -            func(TQType<TOpIn>::As(op), params...); -    } -}; - -using _mm_cvtepi32_ps = -    TCvtS2FWrapperSingle<float32x4_t, int32x4_t, -                         decltype(vcvtq_f32_s32), vcvtq_f32_s32>; - -template <typename TOpOut, typename TOpIn, -          typename TFunc, TFunc* func, typename... TParams> -struct TCvtF2SWrapperSingle : TBaseWrapper<__m128i> { +    TCvtS2FWrapperSingle(const __m128i& op, TParams... params) {  +        TQType<TOpOut>::As(Value) =  +            func(TQType<TOpIn>::As(op), params...);  +    }  +};  +  +using _mm_cvtepi32_ps =  +    TCvtS2FWrapperSingle<float32x4_t, int32x4_t,  +                         decltype(vcvtq_f32_s32), vcvtq_f32_s32>;  +  +template <typename TOpOut, typename TOpIn,  +          typename TFunc, TFunc* func, typename... TParams>  +struct TCvtF2SWrapperSingle : TBaseWrapper<__m128i> {       Y_FORCE_INLINE -    TCvtF2SWrapperSingle(const __m128& op, TParams... params) { -        TQType<TOpOut>::As(Value) = -            func(TQType<TOpIn>::As(op), params...); -    } -}; - +    TCvtF2SWrapperSingle(const __m128& op, TParams... params) {  +        TQType<TOpOut>::As(Value) =  +            func(TQType<TOpIn>::As(op), params...);  +    }  +};  +   inline __m128i _mm_cvtps_epi32(__m128 a) {      /// vcvtq_s32_f32 rounds to zero, but we need to round to the nearest.      static const float32x4_t half = vdupq_n_f32(0.5f); @@ -874,26 +874,26 @@ inline __m128i _mm_cvtps_epi32(__m128 a) {      return res;  } -using _mm_cvttps_epi32 = -    TCvtF2SWrapperSingle<int32x4_t, float32x4_t, -                         decltype(vcvtq_s32_f32), vcvtq_s32_f32>; - +using _mm_cvttps_epi32 =  +    TCvtF2SWrapperSingle<int32x4_t, float32x4_t,  +                         decltype(vcvtq_s32_f32), vcvtq_s32_f32>;  +   Y_FORCE_INLINE int -_mm_movemask_ps(const __m128& op) { -    uint32x4_t mask = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; -    uint32x4_t bits = vandq_u32(op.AsUi32x4, mask); -    int32x4_t shifts = {-31, -30, -29, -28}; -    bits = vshlq_u32(bits, shifts); -    return vaddvq_u32(bits); -} +_mm_movemask_ps(const __m128& op) {  +    uint32x4_t mask = {0x80000000, 0x80000000, 0x80000000, 0x80000000};  +    uint32x4_t bits = vandq_u32(op.AsUi32x4, mask);  +    int32x4_t shifts = {-31, -30, -29, -28};  +    bits = vshlq_u32(bits, shifts);  +    return vaddvq_u32(bits);  +}   Y_FORCE_INLINE i64 _mm_cvtsi128_si64(__m128i a) {      return vgetq_lane_s64(a.AsSi64x2, 0);  } - -static inline void _mm_pause() { +  +static inline void _mm_pause() {       __asm__ ("YIELD"); -} +}   static inline __m128 _mm_rsqrt_ps(__m128 a) {      __m128 res; diff --git a/library/cpp/sse/ut/test.cpp b/library/cpp/sse/ut/test.cpp index 33c999d284b..42a82a8cfa6 100644 --- a/library/cpp/sse/ut/test.cpp +++ b/library/cpp/sse/ut/test.cpp @@ -1,10 +1,10 @@ -/* -  Unittests for all SSE instrinsics translated to NEON instrinsics or -  software implementation. -  Should be tested both on Intel and ARM64. - */ -/* Author: Vitaliy Manushkin <[email protected] */ - +/*  +  Unittests for all SSE instrinsics translated to NEON instrinsics or  +  software implementation.  +  Should be tested both on Intel and ARM64.  + */  +/* Author: Vitaliy Manushkin <[email protected] */  +   #include <library/cpp/testing/unittest/registar.h>  #include <util/generic/typetraits.h> @@ -13,35 +13,35 @@  #include <util/stream/output.h>  #include <algorithm> -#include <array> -#include <limits> +#include <array>  +#include <limits>   #include <memory>  #include <type_traits>  #include <utility> - -template <typename TResult, typename TFunc, TFunc* func> -struct T_mm_CallWrapper { -    TResult Value; - -    template <typename... TParams> -    T_mm_CallWrapper(TParams&&... params) { -        Value = func(std::forward<TParams>(params)...); -    } - -    operator TResult&() { -        return Value; -    } - -    operator const TResult&() const { -        return Value; -    } -}; - -#if defined(_arm64_) +  +template <typename TResult, typename TFunc, TFunc* func>  +struct T_mm_CallWrapper {  +    TResult Value;  +  +    template <typename... TParams>  +    T_mm_CallWrapper(TParams&&... params) {  +        Value = func(std::forward<TParams>(params)...);  +    }  +  +    operator TResult&() {  +        return Value;  +    }  +  +    operator const TResult&() const {  +        return Value;  +    }  +};  +  +#if defined(_arm64_)   #include "library/cpp/sse/sse2neon.h"  #elif defined(_i386_) || defined(_x86_64_) -#include <xmmintrin.h> -#include <emmintrin.h> +#include <xmmintrin.h>  +#include <emmintrin.h>   #include <smmintrin.h>  #elif defined(_ppc64_)  #include "library/cpp/sse/powerpc.h" @@ -54,10 +54,10 @@ struct T_mm_CallWrapper {  #define WrapF(T_mm_func) T_mm_func  #define WrapD(T_mm_func) T_mm_func  #elif defined(_ppc64_) || defined(_i386_) || defined(_x86_64_) -#define Wrap(_mm_func) \ -    T_mm_CallWrapper<__m128i, decltype(_mm_func), _mm_func> -#define WrapF(_mm_func) \ -    T_mm_CallWrapper<__m128, decltype(_mm_func), _mm_func> +#define Wrap(_mm_func) \  +    T_mm_CallWrapper<__m128i, decltype(_mm_func), _mm_func>  +#define WrapF(_mm_func) \  +    T_mm_CallWrapper<__m128, decltype(_mm_func), _mm_func>   #define WrapD(_mm_func) \      T_mm_CallWrapper<__m128d, decltype(_mm_func), _mm_func>  using int8x16_t = std::array<i8, 16>; @@ -70,69 +70,69 @@ using uint32x4_t = std::array<ui32, 4>;  using uint64x2_t = std::array<ui64, 2>;  using float32x4_t = std::array<float, 4>;  using float64x2_t = std::array<double, 2>; - +   template <typename TVectorType> -struct TQType { +struct TQType {       static TVectorType As(__m128i param) {          TVectorType value; -        _mm_storeu_si128((__m128i*)&value, param); -        return value; -    } +        _mm_storeu_si128((__m128i*)&value, param);  +        return value;  +    }       static TVectorType As(__m128 param) {          TVectorType value; -        _mm_storeu_ps((float*)&value, param); -        return value; -    } +        _mm_storeu_ps((float*)&value, param);  +        return value;  +    }       static TVectorType As(__m128d param) {          TVectorType value;          _mm_storeu_pd((double*)&value, param);          return value;      } -}; -#endif - +};  +#endif  +   template <typename TVectorType> -struct TFuncLoad; +struct TFuncLoad;   template <typename TVectorType> -struct TFuncStore; - -template <> -struct TFuncLoad<__m128i> { -    __m128i Value; - -    template <typename TPointer> -    TFuncLoad(TPointer* ptr) { -        Value = _mm_loadu_si128((__m128i*)ptr); -    } - -    operator __m128i&() { -        return Value; -    } - -    operator const __m128i&() const { -        return Value; -    } -}; - -template <> -struct TFuncLoad<__m128> { -    __m128 Value; - -    template <typename TPointer> -    TFuncLoad(TPointer* ptr) { -        Value = _mm_loadu_ps((float*)ptr); -    } - -    operator __m128&() { -        return Value; -    } - -    operator const __m128&() const { -        return Value; -    } -}; - -template <> +struct TFuncStore;  +  +template <>  +struct TFuncLoad<__m128i> {  +    __m128i Value;  +  +    template <typename TPointer>  +    TFuncLoad(TPointer* ptr) {  +        Value = _mm_loadu_si128((__m128i*)ptr);  +    }  +  +    operator __m128i&() {  +        return Value;  +    }  +  +    operator const __m128i&() const {  +        return Value;  +    }  +};  +  +template <>  +struct TFuncLoad<__m128> {  +    __m128 Value;  +  +    template <typename TPointer>  +    TFuncLoad(TPointer* ptr) {  +        Value = _mm_loadu_ps((float*)ptr);  +    }  +  +    operator __m128&() {  +        return Value;  +    }  +  +    operator const __m128&() const {  +        return Value;  +    }  +};  +  +template <>   struct TFuncLoad<__m128d> {      __m128d Value; @@ -151,153 +151,153 @@ struct TFuncLoad<__m128d> {  };  template <> -struct TFuncStore<__m128i> { -    template <typename TPointer> -    TFuncStore(TPointer* ptr, __m128i Value) { -        _mm_storeu_si128((__m128i*)ptr, Value); -    } -}; - -template <> -struct TFuncStore<__m128> { -    template <typename TPointer> -    TFuncStore(TPointer* ptr, __m128 Value) { -        _mm_storeu_ps((float*)ptr, Value); -    } -}; - -class TSSEEmulTest: public TTestBase { -private: -    UNIT_TEST_SUITE(TSSEEmulTest); -    UNIT_TEST(Test_mm_load_si128); -    UNIT_TEST(Test_mm_loadu_si128); +struct TFuncStore<__m128i> {  +    template <typename TPointer>  +    TFuncStore(TPointer* ptr, __m128i Value) {  +        _mm_storeu_si128((__m128i*)ptr, Value);  +    }  +};  +  +template <>  +struct TFuncStore<__m128> {  +    template <typename TPointer>  +    TFuncStore(TPointer* ptr, __m128 Value) {  +        _mm_storeu_ps((float*)ptr, Value);  +    }  +};  +  +class TSSEEmulTest: public TTestBase {  +private:  +    UNIT_TEST_SUITE(TSSEEmulTest);  +    UNIT_TEST(Test_mm_load_si128);  +    UNIT_TEST(Test_mm_loadu_si128);       UNIT_TEST(Test_mm_storeu_si128);      UNIT_TEST(Test_mm_loadu_si128_2);      UNIT_TEST(Test_mm_loadu_ps);      UNIT_TEST(Test_mm_storeu_ps); - +       UNIT_TEST(Test_mm_slli_epi16);      UNIT_TEST(Test_mm_slli_epi32);      UNIT_TEST(Test_mm_slli_epi64);      UNIT_TEST(Test_mm_slli_si128); -    UNIT_TEST(Test_mm_srli_epi16); -    UNIT_TEST(Test_mm_srli_epi32); -    UNIT_TEST(Test_mm_srli_epi64); +    UNIT_TEST(Test_mm_srli_epi16);  +    UNIT_TEST(Test_mm_srli_epi32);  +    UNIT_TEST(Test_mm_srli_epi64);       UNIT_TEST(Test_mm_srli_si128); - +       UNIT_TEST(Test_mm_srai_epi16);      UNIT_TEST(Test_mm_srai_epi32);      UNIT_TEST(Test_mm_sll_epi16);      UNIT_TEST(Test_mm_sll_epi32);      UNIT_TEST(Test_mm_sll_epi64); - +       UNIT_TEST(Test_mm_srl_epi16);      UNIT_TEST(Test_mm_srl_epi32);      UNIT_TEST(Test_mm_srl_epi64); -    UNIT_TEST(Test_mm_add_epi16); -    UNIT_TEST(Test_mm_add_epi32); -    UNIT_TEST(Test_mm_add_epi64); -    UNIT_TEST(Test_mm_add_ps); +    UNIT_TEST(Test_mm_add_epi16);  +    UNIT_TEST(Test_mm_add_epi32);  +    UNIT_TEST(Test_mm_add_epi64);  +    UNIT_TEST(Test_mm_add_ps);       UNIT_TEST(Test_mm_add_pd); - +       UNIT_TEST(Test_mm_madd_epi16); -    UNIT_TEST(Test_mm_sub_epi16); -    UNIT_TEST(Test_mm_sub_epi32); -    UNIT_TEST(Test_mm_sub_epi64); -    UNIT_TEST(Test_mm_sub_ps); +    UNIT_TEST(Test_mm_sub_epi16);  +    UNIT_TEST(Test_mm_sub_epi32);  +    UNIT_TEST(Test_mm_sub_epi64);  +    UNIT_TEST(Test_mm_sub_ps);       UNIT_TEST(Test_mm_sub_pd); - -    UNIT_TEST(Test_mm_mul_ps); +  +    UNIT_TEST(Test_mm_mul_ps);       UNIT_TEST(Test_mm_mul_pd); -    UNIT_TEST(Test_mm_div_ps); +    UNIT_TEST(Test_mm_div_ps);       UNIT_TEST(Test_mm_div_pd); -    UNIT_TEST(Test_mm_max_ps); -    UNIT_TEST(Test_mm_min_ps); -    UNIT_TEST(Test_mm_and_ps); - -    UNIT_TEST(Test_mm_unpacklo_epi8); -    UNIT_TEST(Test_mm_unpackhi_epi8); -    UNIT_TEST(Test_mm_unpacklo_epi16); -    UNIT_TEST(Test_mm_unpackhi_epi16); -    UNIT_TEST(Test_mm_unpacklo_epi32); -    UNIT_TEST(Test_mm_unpackhi_epi32); -    UNIT_TEST(Test_mm_unpacklo_epi64); -    UNIT_TEST(Test_mm_unpackhi_epi64); - -    UNIT_TEST(Test_mm_or_si128); -    UNIT_TEST(Test_mm_and_si128); -    UNIT_TEST(Test_mm_andnot_si128); - -    UNIT_TEST(Test_mm_cmpeq_epi8); -    UNIT_TEST(Test_mm_cmpeq_epi16); -    UNIT_TEST(Test_mm_cmpeq_epi32); -    UNIT_TEST(Test_mm_cmpeq_ps); - -    UNIT_TEST(Test_mm_cmpgt_epi8); -    UNIT_TEST(Test_mm_cmpgt_epi16); -    UNIT_TEST(Test_mm_cmpgt_epi32); -    UNIT_TEST(Test_mm_cmpgt_ps); - -    UNIT_TEST(Test_mm_cmplt_epi8); -    UNIT_TEST(Test_mm_cmplt_epi16); -    UNIT_TEST(Test_mm_cmplt_epi32); - -    UNIT_TEST(Test_mm_set1_epi8); -    UNIT_TEST(Test_mm_set1_epi16); -    UNIT_TEST(Test_mm_set1_epi32); -    UNIT_TEST(Test_mm_set1_ps); +    UNIT_TEST(Test_mm_max_ps);  +    UNIT_TEST(Test_mm_min_ps);  +    UNIT_TEST(Test_mm_and_ps);  +  +    UNIT_TEST(Test_mm_unpacklo_epi8);  +    UNIT_TEST(Test_mm_unpackhi_epi8);  +    UNIT_TEST(Test_mm_unpacklo_epi16);  +    UNIT_TEST(Test_mm_unpackhi_epi16);  +    UNIT_TEST(Test_mm_unpacklo_epi32);  +    UNIT_TEST(Test_mm_unpackhi_epi32);  +    UNIT_TEST(Test_mm_unpacklo_epi64);  +    UNIT_TEST(Test_mm_unpackhi_epi64);  +  +    UNIT_TEST(Test_mm_or_si128);  +    UNIT_TEST(Test_mm_and_si128);  +    UNIT_TEST(Test_mm_andnot_si128);  +  +    UNIT_TEST(Test_mm_cmpeq_epi8);  +    UNIT_TEST(Test_mm_cmpeq_epi16);  +    UNIT_TEST(Test_mm_cmpeq_epi32);  +    UNIT_TEST(Test_mm_cmpeq_ps);  +  +    UNIT_TEST(Test_mm_cmpgt_epi8);  +    UNIT_TEST(Test_mm_cmpgt_epi16);  +    UNIT_TEST(Test_mm_cmpgt_epi32);  +    UNIT_TEST(Test_mm_cmpgt_ps);  +  +    UNIT_TEST(Test_mm_cmplt_epi8);  +    UNIT_TEST(Test_mm_cmplt_epi16);  +    UNIT_TEST(Test_mm_cmplt_epi32);  +  +    UNIT_TEST(Test_mm_set1_epi8);  +    UNIT_TEST(Test_mm_set1_epi16);  +    UNIT_TEST(Test_mm_set1_epi32);  +    UNIT_TEST(Test_mm_set1_ps);       UNIT_TEST(Test_mm_set_ps1); - -    UNIT_TEST(Test_mm_setzero_si128); -    UNIT_TEST(Test_mm_setzero_ps); +  +    UNIT_TEST(Test_mm_setzero_si128);  +    UNIT_TEST(Test_mm_setzero_ps);       UNIT_TEST(Test_mm_setzero_pd); - -    UNIT_TEST(Test_mm_storel_epi64); -    UNIT_TEST(Test_mm_loadl_epi64); - +  +    UNIT_TEST(Test_mm_storel_epi64);  +    UNIT_TEST(Test_mm_loadl_epi64);  +       UNIT_TEST(Test_mm_loadl_pd);      UNIT_TEST(Test_mm_loadh_pd);      UNIT_TEST(Test_mm_cvtsd_f64); -    UNIT_TEST(Test_mm_shuffle_epi32); -    UNIT_TEST(Test_mm_movemask_epi8); -    UNIT_TEST(Test_mm_cvtsi128_si32); +    UNIT_TEST(Test_mm_shuffle_epi32);  +    UNIT_TEST(Test_mm_movemask_epi8);  +    UNIT_TEST(Test_mm_cvtsi128_si32);       UNIT_TEST(Test_mm_cvtsi128_si64); - -    UNIT_TEST(Test_mm_set_epi16); -    UNIT_TEST(Test_mm_set_epi32); -    UNIT_TEST(Test_mm_set_ps); +  +    UNIT_TEST(Test_mm_set_epi16);  +    UNIT_TEST(Test_mm_set_epi32);  +    UNIT_TEST(Test_mm_set_ps);       UNIT_TEST(Test_mm_set_pd); - -    UNIT_TEST(Test_mm_cvtsi32_si128); +  +    UNIT_TEST(Test_mm_cvtsi32_si128);       UNIT_TEST(Test_mm_cvtsi64_si128); - -    UNIT_TEST(Test_mm_packs_epi16); -    UNIT_TEST(Test_mm_packs_epi32); -    UNIT_TEST(Test_mm_packus_epi16); - -    UNIT_TEST(Test_mm_extract_epi16); +  +    UNIT_TEST(Test_mm_packs_epi16);  +    UNIT_TEST(Test_mm_packs_epi32);  +    UNIT_TEST(Test_mm_packus_epi16);  +  +    UNIT_TEST(Test_mm_extract_epi16);       UNIT_TEST(Test_mm_extract_epi8);      UNIT_TEST(Test_mm_extract_epi32);      UNIT_TEST(Test_mm_extract_epi64); - -    UNIT_TEST(Test_MM_TRANSPOSE4_PS); -    UNIT_TEST(Test_mm_movemask_ps); +  +    UNIT_TEST(Test_MM_TRANSPOSE4_PS);  +    UNIT_TEST(Test_mm_movemask_ps);       UNIT_TEST(Test_mm_movemask_ps_2); - -    UNIT_TEST(Test_mm_cvtepi32_ps); -    UNIT_TEST(Test_mm_cvtps_epi32); -    UNIT_TEST(Test_mm_cvttps_epi32); - -    UNIT_TEST(Test_mm_castsi128_ps); -    UNIT_TEST(Test_mm_castps_si128); - -    UNIT_TEST(Test_mm_mul_epu32); - +  +    UNIT_TEST(Test_mm_cvtepi32_ps);  +    UNIT_TEST(Test_mm_cvtps_epi32);  +    UNIT_TEST(Test_mm_cvttps_epi32);  +  +    UNIT_TEST(Test_mm_castsi128_ps);  +    UNIT_TEST(Test_mm_castps_si128);  +  +    UNIT_TEST(Test_mm_mul_epu32);  +       UNIT_TEST(Test_mm_cmpunord_ps);      UNIT_TEST(Test_mm_andnot_ps);      UNIT_TEST(Test_mm_shuffle_ps); @@ -310,36 +310,36 @@ private:      UNIT_TEST(Test_mm_rsqrt_ps);      UNIT_TEST(Test_matrixnet_powerpc); -    UNIT_TEST_SUITE_END(); - -public: -    void Test_mm_load_si128(); -    void Test_mm_loadu_si128(); +    UNIT_TEST_SUITE_END();  +  +public:  +    void Test_mm_load_si128();  +    void Test_mm_loadu_si128();       void Test_mm_storeu_si128();      void Test_mm_loadu_si128_2();      void Test_mm_loadu_ps();      void Test_mm_storeu_ps(); - -    template <typename TElem, int bits, int elemCount, +  +    template <typename TElem, int bits, int elemCount,                 typename TFunc, typename TShifter, typename TOp, typename TElemFunc> -    void Test_mm_shifter_epiXX(); - +    void Test_mm_shifter_epiXX();  +       enum class EDirection {          Left,          Right      }; - +       struct TShiftRes {          __m128i Value[17];      };      void Test_mm_byte_shifter(EDirection direction, std::function<TShiftRes (__m128i)> foo); -    void Test_mm_slli_epi16(); -    void Test_mm_slli_epi32(); -    void Test_mm_slli_epi64(); +    void Test_mm_slli_epi16();  +    void Test_mm_slli_epi32();  +    void Test_mm_slli_epi64();       void Test_mm_slli_si128(); - +       void Test_mm_srli_epi16();      void Test_mm_srli_epi32();      void Test_mm_srli_epi64(); @@ -356,134 +356,134 @@ public:      void Test_mm_srl_epi32();      void Test_mm_srl_epi64(); -    void Test_mm_add_epi8(); -    void Test_mm_add_epi16(); -    void Test_mm_add_epi32(); -    void Test_mm_add_epi64(); -    void Test_mm_add_ps(); +    void Test_mm_add_epi8();  +    void Test_mm_add_epi16();  +    void Test_mm_add_epi32();  +    void Test_mm_add_epi64();  +    void Test_mm_add_ps();       void Test_mm_add_pd(); - +       void Test_mm_madd_epi16(); -    void Test_mm_sub_epi8(); -    void Test_mm_sub_epi16(); -    void Test_mm_sub_epi32(); -    void Test_mm_sub_epi64(); -    void Test_mm_sub_ps(); +    void Test_mm_sub_epi8();  +    void Test_mm_sub_epi16();  +    void Test_mm_sub_epi32();  +    void Test_mm_sub_epi64();  +    void Test_mm_sub_ps();       void Test_mm_sub_pd(); - -    void Test_mm_mul_ps(); +  +    void Test_mm_mul_ps();       void Test_mm_mul_pd(); -    void Test_mm_div_ps(); +    void Test_mm_div_ps();       void Test_mm_div_pd(); -    void Test_mm_max_ps(); -    void Test_mm_min_ps(); -    void Test_mm_and_ps(); - -    template <typename TElem, int bits, int elemCount, int shift, -              typename TFunc, typename TOp> -    void Test_mm_unpack_epiXX(); -    void Test_mm_unpacklo_epi8(); -    void Test_mm_unpackhi_epi8(); -    void Test_mm_unpacklo_epi16(); -    void Test_mm_unpackhi_epi16(); -    void Test_mm_unpacklo_epi32(); -    void Test_mm_unpackhi_epi32(); -    void Test_mm_unpacklo_epi64(); -    void Test_mm_unpackhi_epi64(); - -    template <typename TElem, unsigned elemCount, -              typename TFunc, typename TElemFunc, +    void Test_mm_max_ps();  +    void Test_mm_min_ps();  +    void Test_mm_and_ps();  +  +    template <typename TElem, int bits, int elemCount, int shift,  +              typename TFunc, typename TOp>  +    void Test_mm_unpack_epiXX();  +    void Test_mm_unpacklo_epi8();  +    void Test_mm_unpackhi_epi8();  +    void Test_mm_unpacklo_epi16();  +    void Test_mm_unpackhi_epi16();  +    void Test_mm_unpacklo_epi32();  +    void Test_mm_unpackhi_epi32();  +    void Test_mm_unpacklo_epi64();  +    void Test_mm_unpackhi_epi64();  +  +    template <typename TElem, unsigned elemCount,  +              typename TFunc, typename TElemFunc,                 typename TOp, typename TVectorType = __m128i> -    void Test_mm_dualop(); - -    template <typename TElem, unsigned elemCount, -              typename TFunc, typename TElemFunc, +    void Test_mm_dualop();  +  +    template <typename TElem, unsigned elemCount,  +              typename TFunc, typename TElemFunc,                 typename TOp, typename TVectorType = __m128i> -    void Test_mm_dualcmp(); - -    void Test_mm_or_si128(); -    void Test_mm_and_si128(); -    void Test_mm_andnot_si128(); - -    void Test_mm_cmpeq_epi8(); -    void Test_mm_cmpeq_epi16(); -    void Test_mm_cmpeq_epi32(); -    void Test_mm_cmpeq_ps(); - -    void Test_mm_cmpgt_epi8(); -    void Test_mm_cmpgt_epi16(); -    void Test_mm_cmpgt_epi32(); -    void Test_mm_cmpgt_ps(); - -    void Test_mm_cmplt_epi8(); -    void Test_mm_cmplt_epi16(); -    void Test_mm_cmplt_epi32(); - -    template <typename TElem, int elemCount, +    void Test_mm_dualcmp();  +  +    void Test_mm_or_si128();  +    void Test_mm_and_si128();  +    void Test_mm_andnot_si128();  +  +    void Test_mm_cmpeq_epi8();  +    void Test_mm_cmpeq_epi16();  +    void Test_mm_cmpeq_epi32();  +    void Test_mm_cmpeq_ps();  +  +    void Test_mm_cmpgt_epi8();  +    void Test_mm_cmpgt_epi16();  +    void Test_mm_cmpgt_epi32();  +    void Test_mm_cmpgt_ps();  +  +    void Test_mm_cmplt_epi8();  +    void Test_mm_cmplt_epi16();  +    void Test_mm_cmplt_epi32();  +  +    template <typename TElem, int elemCount,                 typename TFunc, typename TOp, typename TVectorType> -    void Test_mm_setter_epiXX(); -    void Test_mm_set1_epi8(); -    void Test_mm_set1_epi16(); -    void Test_mm_set1_epi32(); -    void Test_mm_set1_ps(); +    void Test_mm_setter_epiXX();  +    void Test_mm_set1_epi8();  +    void Test_mm_set1_epi16();  +    void Test_mm_set1_epi32();  +    void Test_mm_set1_ps();       void Test_mm_set_ps1(); - -    void Test_mm_setzero_si128(); -    void Test_mm_setzero_ps(); +  +    void Test_mm_setzero_si128();  +    void Test_mm_setzero_ps();       void Test_mm_setzero_pd(); - -    void Test_mm_loadl_epi64(); -    void Test_mm_storel_epi64(); - +  +    void Test_mm_loadl_epi64();  +    void Test_mm_storel_epi64();  +       void Test_mm_loadl_pd();      void Test_mm_loadh_pd();      void Test_mm_cvtsd_f64(); -    void Test_mm_shuffle_epi32(); -    void Test_mm_movemask_epi8(); -    void Test_mm_cvtsi128_si32(); +    void Test_mm_shuffle_epi32();  +    void Test_mm_movemask_epi8();  +    void Test_mm_cvtsi128_si32();       void Test_mm_cvtsi128_si64(); - -    void Test_mm_set_epi16(); -    void Test_mm_set_epi32(); -    void Test_mm_set_ps(); +  +    void Test_mm_set_epi16();  +    void Test_mm_set_epi32();  +    void Test_mm_set_ps();       void Test_mm_set_pd(); - -    void Test_mm_cvtsi32_si128(); +  +    void Test_mm_cvtsi32_si128();       void Test_mm_cvtsi64_si128(); - -    template <typename TElem, typename TNarrow, unsigned elemCount, -              typename TFunc> -    void Test_mm_packs_epiXX(); -    void Test_mm_packs_epi16(); -    void Test_mm_packs_epi32(); -    void Test_mm_packus_epi16(); - -    void Test_mm_extract_epi16(); +  +    template <typename TElem, typename TNarrow, unsigned elemCount,  +              typename TFunc>  +    void Test_mm_packs_epiXX();  +    void Test_mm_packs_epi16();  +    void Test_mm_packs_epi32();  +    void Test_mm_packus_epi16();  +  +    void Test_mm_extract_epi16();       void Test_mm_extract_epi8();      void Test_mm_extract_epi32();      void Test_mm_extract_epi64(); - -    void Test_MM_TRANSPOSE4_PS(); -    void Test_mm_movemask_ps(); +  +    void Test_MM_TRANSPOSE4_PS();  +    void Test_mm_movemask_ps();       void Test_mm_movemask_ps_2(); - -    template <typename TFrom, typename TTo, unsigned elemCount, -              typename TLoadVector, typename TResultVector, -              typename TElemFunc, typename TFunc, typename TOp> -    void Test_mm_convertop(); -    void Test_mm_cvtepi32_ps(); -    void Test_mm_cvtps_epi32(); -    void Test_mm_cvttps_epi32(); - -    template <typename TLoadVector, typename TCastVector, -              typename TFunc, TFunc* func> -    void Test_mm_castXX(); -    void Test_mm_castsi128_ps(); -    void Test_mm_castps_si128(); - -    void Test_mm_mul_epu32(); +  +    template <typename TFrom, typename TTo, unsigned elemCount,  +              typename TLoadVector, typename TResultVector,  +              typename TElemFunc, typename TFunc, typename TOp>  +    void Test_mm_convertop();  +    void Test_mm_cvtepi32_ps();  +    void Test_mm_cvtps_epi32();  +    void Test_mm_cvttps_epi32();  +  +    template <typename TLoadVector, typename TCastVector,  +              typename TFunc, TFunc* func>  +    void Test_mm_castXX();  +    void Test_mm_castsi128_ps();  +    void Test_mm_castps_si128();  +  +    void Test_mm_mul_epu32();       void Test_mm_cmpunord_ps();      void Test_mm_store_ss(); @@ -497,30 +497,30 @@ public:      void Test_mm_rsqrt_ps();      void Test_mm_rsqrt_ss();      void Test_matrixnet_powerpc(); -}; - -UNIT_TEST_SUITE_REGISTRATION(TSSEEmulTest); - -void TSSEEmulTest::Test_mm_load_si128() { +};  +  +UNIT_TEST_SUITE_REGISTRATION(TSSEEmulTest);  +  +void TSSEEmulTest::Test_mm_load_si128() {       alignas(16) char data[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    __m128i value = _mm_load_si128((__m128i*)&data); -    UNIT_ASSERT_EQUAL(TQType<uint64x2_t>::As(value)[0], 0xAABB2211CCFF00AAUL); -    UNIT_ASSERT_EQUAL(TQType<uint64x2_t>::As(value)[1], 0x1C66775588449933UL); -} - -void TSSEEmulTest::Test_mm_loadu_si128() { +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    __m128i value = _mm_load_si128((__m128i*)&data);  +    UNIT_ASSERT_EQUAL(TQType<uint64x2_t>::As(value)[0], 0xAABB2211CCFF00AAUL);  +    UNIT_ASSERT_EQUAL(TQType<uint64x2_t>::As(value)[1], 0x1C66775588449933UL);  +}  +  +void TSSEEmulTest::Test_mm_loadu_si128() {       alignas(16) char data[17] = { -        '\x66', -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    UNIT_ASSERT((ui64(&data[1]) & 0x1) == 0x1); -    __m128i value = _mm_loadu_si128((__m128i*)&data[1]); -    UNIT_ASSERT(TQType<uint64x2_t>::As(value)[0] == 0xAABB2211CCFF00AAUL); -    UNIT_ASSERT(TQType<uint64x2_t>::As(value)[1] == 0x1C66775588449933UL); -} - +        '\x66',  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    UNIT_ASSERT((ui64(&data[1]) & 0x1) == 0x1);  +    __m128i value = _mm_loadu_si128((__m128i*)&data[1]);  +    UNIT_ASSERT(TQType<uint64x2_t>::As(value)[0] == 0xAABB2211CCFF00AAUL);  +    UNIT_ASSERT(TQType<uint64x2_t>::As(value)[1] == 0x1C66775588449933UL);  +}  +   void TSSEEmulTest::Test_mm_storeu_si128() {      alignas(16) unsigned char stub[32] = {          0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, @@ -640,32 +640,32 @@ unsigned MakeNumber<unsigned>(unsigned number) {      return number;  } -template <typename TElem, int bits, int elemCount, +template <typename TElem, int bits, int elemCount,             typename TFunc, typename TShifter, typename TOp, typename TElemFunc> -void TSSEEmulTest::Test_mm_shifter_epiXX() { -    char data[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    TElem* dataw = reinterpret_cast<TElem*>(&data); - -    __m128i value = _mm_loadu_si128((__m128i*)&data); - +void TSSEEmulTest::Test_mm_shifter_epiXX() {  +    char data[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    TElem* dataw = reinterpret_cast<TElem*>(&data);  +  +    __m128i value = _mm_loadu_si128((__m128i*)&data);  +       for (unsigned shifter = 0; shifter <= bits; ++shifter) { -        TElem shiftedData[elemCount]; +        TElem shiftedData[elemCount];           for (unsigned i = 0; i < elemCount; ++i) { -            shiftedData[i] = TElemFunc::Call(dataw[i], shifter); +            shiftedData[i] = TElemFunc::Call(dataw[i], shifter);           } - +           const TShifter adhoc_shifter = MakeNumber<TShifter>(shifter);          __m128i result = TFunc(value, adhoc_shifter);          for (unsigned i = 0; i < elemCount; ++i) { -            UNIT_ASSERT_EQUAL(shiftedData[i], TQType<TOp>::As(result)[i]); +            UNIT_ASSERT_EQUAL(shiftedData[i], TQType<TOp>::As(result)[i]);           } -    } -} - +    }  +}  +   void TSSEEmulTest::Test_mm_byte_shifter(EDirection direction, std::function<TShiftRes (__m128i)> foo) {      const char data[48] = { @@ -713,52 +713,52 @@ struct THelperASHR {      }  }; -template <typename TElem> -struct THelperSHR { -    static TElem Call(const TElem op, const int shift) { +template <typename TElem>  +struct THelperSHR {  +    static TElem Call(const TElem op, const int shift) {           constexpr int nBitsInOp = sizeof(op) * CHAR_BIT;          return shift < nBitsInOp ? op >> shift : 0; -    } -}; - -void TSSEEmulTest::Test_mm_srli_epi16() { +    }  +};  +  +void TSSEEmulTest::Test_mm_srli_epi16() {       Test_mm_shifter_epiXX<ui16, 16, 8, Wrap(_mm_srli_epi16), unsigned, uint16x8_t, -                          THelperSHR<ui16>>(); -} - -void TSSEEmulTest::Test_mm_srli_epi32() { +                          THelperSHR<ui16>>();  +}  +  +void TSSEEmulTest::Test_mm_srli_epi32() {       Test_mm_shifter_epiXX<ui32, 32, 4, Wrap(_mm_srli_epi32), unsigned, uint32x4_t, -                          THelperSHR<ui32>>(); -} - -void TSSEEmulTest::Test_mm_srli_epi64() { +                          THelperSHR<ui32>>();  +}  +  +void TSSEEmulTest::Test_mm_srli_epi64() {       Test_mm_shifter_epiXX<ui64, 64, 2, Wrap(_mm_srli_epi64), unsigned, uint64x2_t, -                          THelperSHR<ui64>>(); -} - -template <typename TElem> -struct THelperSHL { -    static TElem Call(const TElem op, const int shift) { +                          THelperSHR<ui64>>();  +}  +  +template <typename TElem>  +struct THelperSHL {  +    static TElem Call(const TElem op, const int shift) {           constexpr int nBitsInOp = sizeof(op) * CHAR_BIT;          return shift < nBitsInOp ? op << shift : 0; -    } -}; - -void TSSEEmulTest::Test_mm_slli_epi16() { +    }  +};  +  +void TSSEEmulTest::Test_mm_slli_epi16() {       Test_mm_shifter_epiXX<ui16, 16, 8, Wrap(_mm_slli_epi16), unsigned, uint16x8_t, -                          THelperSHL<ui16>>(); -} - -void TSSEEmulTest::Test_mm_slli_epi32() { +                          THelperSHL<ui16>>();  +}  +  +void TSSEEmulTest::Test_mm_slli_epi32() {       Test_mm_shifter_epiXX<ui32, 32, 4, Wrap(_mm_slli_epi32), unsigned, uint32x4_t, -                          THelperSHL<ui32>>(); -} - -void TSSEEmulTest::Test_mm_slli_epi64() { +                          THelperSHL<ui32>>();  +}  +  +void TSSEEmulTest::Test_mm_slli_epi64() {       Test_mm_shifter_epiXX<ui64, 64, 2, Wrap(_mm_slli_epi64), unsigned, uint64x2_t, -                          THelperSHL<ui64>>(); -} - +                          THelperSHL<ui64>>();  +}  +   void TSSEEmulTest::Test_mm_slli_si128() {      Test_mm_byte_shifter(EDirection::Left, [] (__m128i a) -> TShiftRes {          TShiftRes res; @@ -849,30 +849,30 @@ void TSSEEmulTest::Test_mm_sll_epi64() {                            THelperSHL<ui64>>();  } -template <typename TElem> -struct THelperAdd { -    static TElem Call(const TElem op1, const TElem op2) { -        return op1 + op2; -    } -}; - -void TSSEEmulTest::Test_mm_add_epi16() { -    Test_mm_dualop<ui16, 8, Wrap(_mm_add_epi16), THelperAdd<ui16>, uint16x8_t>(); -} - -void TSSEEmulTest::Test_mm_add_epi32() { -    Test_mm_dualop<ui32, 4, Wrap(_mm_add_epi32), THelperAdd<ui32>, uint32x4_t>(); -} - -void TSSEEmulTest::Test_mm_add_epi64() { -    Test_mm_dualop<ui64, 2, Wrap(_mm_add_epi64), THelperAdd<ui64>, uint64x2_t>(); -} - -void TSSEEmulTest::Test_mm_add_ps() { -    Test_mm_dualop<float, 2, WrapF(_mm_add_ps), -                   THelperAdd<float>, float32x4_t, __m128>(); -} - +template <typename TElem>  +struct THelperAdd {  +    static TElem Call(const TElem op1, const TElem op2) {  +        return op1 + op2;  +    }  +};  +  +void TSSEEmulTest::Test_mm_add_epi16() {  +    Test_mm_dualop<ui16, 8, Wrap(_mm_add_epi16), THelperAdd<ui16>, uint16x8_t>();  +}  +  +void TSSEEmulTest::Test_mm_add_epi32() {  +    Test_mm_dualop<ui32, 4, Wrap(_mm_add_epi32), THelperAdd<ui32>, uint32x4_t>();  +}  +  +void TSSEEmulTest::Test_mm_add_epi64() {  +    Test_mm_dualop<ui64, 2, Wrap(_mm_add_epi64), THelperAdd<ui64>, uint64x2_t>();  +}  +  +void TSSEEmulTest::Test_mm_add_ps() {  +    Test_mm_dualop<float, 2, WrapF(_mm_add_ps),  +                   THelperAdd<float>, float32x4_t, __m128>();  +}  +   void TSSEEmulTest::Test_mm_add_pd() {      Test_mm_dualop<double, 2, WrapD(_mm_add_pd),                     THelperAdd<double>, float64x2_t, __m128d>(); @@ -904,44 +904,44 @@ void TSSEEmulTest::Test_mm_madd_epi16() {  } -template <typename TElem> -struct THelperSub { -    static TElem Call(const TElem op1, const TElem op2) { -        return op1 - op2; -    } -}; - -void TSSEEmulTest::Test_mm_sub_epi16() { -    Test_mm_dualop<ui16, 8, Wrap(_mm_sub_epi16), THelperSub<ui16>, uint16x8_t>(); -} - -void TSSEEmulTest::Test_mm_sub_epi32() { -    Test_mm_dualop<ui32, 4, Wrap(_mm_sub_epi32), THelperSub<ui32>, uint32x4_t>(); -} - -void TSSEEmulTest::Test_mm_sub_epi64() { -    Test_mm_dualop<ui64, 2, Wrap(_mm_sub_epi64), THelperSub<ui64>, uint64x2_t>(); -} - -void TSSEEmulTest::Test_mm_sub_ps() { -    Test_mm_dualop<float, 4, WrapF(_mm_sub_ps), THelperSub<float>, -                   float32x4_t, __m128>(); -} - +template <typename TElem>  +struct THelperSub {  +    static TElem Call(const TElem op1, const TElem op2) {  +        return op1 - op2;  +    }  +};  +  +void TSSEEmulTest::Test_mm_sub_epi16() {  +    Test_mm_dualop<ui16, 8, Wrap(_mm_sub_epi16), THelperSub<ui16>, uint16x8_t>();  +}  +  +void TSSEEmulTest::Test_mm_sub_epi32() {  +    Test_mm_dualop<ui32, 4, Wrap(_mm_sub_epi32), THelperSub<ui32>, uint32x4_t>();  +}  +  +void TSSEEmulTest::Test_mm_sub_epi64() {  +    Test_mm_dualop<ui64, 2, Wrap(_mm_sub_epi64), THelperSub<ui64>, uint64x2_t>();  +}  +  +void TSSEEmulTest::Test_mm_sub_ps() {  +    Test_mm_dualop<float, 4, WrapF(_mm_sub_ps), THelperSub<float>,  +                   float32x4_t, __m128>();  +}  +   void TSSEEmulTest::Test_mm_sub_pd() {      Test_mm_dualop<double, 2, WrapD(_mm_sub_pd), THelperSub<double>,                     float64x2_t, __m128d>();  } -void TSSEEmulTest::Test_mm_mul_ps() { -    struct THelper { -        static float Call(const float op1, const float op2) { -            return op1 * op2; -        } -    }; -    Test_mm_dualop<float, 4, WrapF(_mm_mul_ps), THelper, float32x4_t, __m128>(); -} - +void TSSEEmulTest::Test_mm_mul_ps() {  +    struct THelper {  +        static float Call(const float op1, const float op2) {  +            return op1 * op2;  +        }  +    };  +    Test_mm_dualop<float, 4, WrapF(_mm_mul_ps), THelper, float32x4_t, __m128>();  +}  +   void TSSEEmulTest::Test_mm_mul_pd() {      struct THelper {          static double Call(const double op1, const double op2) { @@ -951,15 +951,15 @@ void TSSEEmulTest::Test_mm_mul_pd() {      Test_mm_dualop<double, 2, WrapD(_mm_mul_pd), THelper, float64x2_t, __m128d>();  } -void TSSEEmulTest::Test_mm_div_ps() { -    struct THelper { -        static float Call(const float op1, const float op2) { -            return op1 / op2; -        } -    }; -    Test_mm_dualop<float, 4, WrapF(_mm_div_ps), THelper, float32x4_t, __m128>(); -} - +void TSSEEmulTest::Test_mm_div_ps() {  +    struct THelper {  +        static float Call(const float op1, const float op2) {  +            return op1 / op2;  +        }  +    };  +    Test_mm_dualop<float, 4, WrapF(_mm_div_ps), THelper, float32x4_t, __m128>();  +}  +   void TSSEEmulTest::Test_mm_div_pd() {      struct THelper {          static double Call(const double op1, const double op2) { @@ -969,441 +969,441 @@ void TSSEEmulTest::Test_mm_div_pd() {      Test_mm_dualop<double, 2, WrapD(_mm_div_pd), THelper, float64x2_t, __m128d>();  } -void TSSEEmulTest::Test_mm_max_ps() { -    struct THelper { -        static float Call(const float op1, const float op2) { -            return std::max(op1, op2); -        } -    }; -    Test_mm_dualop<float, 4, WrapF(_mm_max_ps), THelper, float32x4_t, __m128>(); -} - -void TSSEEmulTest::Test_mm_min_ps() { -    struct THelper { -        static float Call(const float op1, const float op2) { -            return std::min(op1, op2); -        } -    }; -    Test_mm_dualop<float, 4, WrapF(_mm_min_ps), THelper, float32x4_t, __m128>(); -} - -void TSSEEmulTest::Test_mm_and_ps() { -    struct THelper { -        static float Call(const float op1, const float op2) { -            union Cast { -                unsigned int AsUInt; -                float AsFloat; -            }; -            Cast v1, v2, result; -            v1.AsFloat = op1; -            v2.AsFloat = op2; -            result.AsUInt = v1.AsUInt & v2.AsUInt; -            return result.AsFloat; -        } -    }; -    Test_mm_dualcmp<float, 4, WrapF(_mm_and_ps), -                    THelper, float32x4_t, __m128>(); -} - -template <typename TElem, int bits, int elemCount, int shift, -          typename TFunc, typename TOp> -void TSSEEmulTest::Test_mm_unpack_epiXX() { -    char data1[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    char data2[16] = { -        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', -        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'}; -    TElem* dataw1 = reinterpret_cast<TElem*>(&data1); -    TElem* dataw2 = reinterpret_cast<TElem*>(&data2); - -    __m128i value1 = _mm_loadu_si128((__m128i*)&data1); -    __m128i value2 = _mm_loadu_si128((__m128i*)&data2); - -    TElem zippedData[elemCount]; -    for (unsigned i = 0; i < elemCount / 2; ++i) { -        zippedData[i * 2] = dataw1[i + shift]; -        zippedData[i * 2 + 1] = dataw2[i + shift]; -    } -    __m128i result = TFunc(value1, value2); - -    for (unsigned i = 0; i < elemCount / 2; ++i) { -        UNIT_ASSERT_EQUAL(zippedData[i * 2], TQType<TOp>::As(result)[i * 2]); -        UNIT_ASSERT_EQUAL(zippedData[i * 2 + 1], -                          TQType<TOp>::As(result)[i * 2 + 1]); -    } -} - -void TSSEEmulTest::Test_mm_unpacklo_epi8() { -    Test_mm_unpack_epiXX<ui8, 8, 16, 0, Wrap(_mm_unpacklo_epi8), uint8x16_t>(); -} - -void TSSEEmulTest::Test_mm_unpackhi_epi8() { -    Test_mm_unpack_epiXX<ui8, 8, 16, 8, Wrap(_mm_unpackhi_epi8), uint8x16_t>(); -} - -void TSSEEmulTest::Test_mm_unpacklo_epi16() { -    Test_mm_unpack_epiXX<ui16, 16, 8, 0, Wrap(_mm_unpacklo_epi16), uint16x8_t>(); -} - -void TSSEEmulTest::Test_mm_unpackhi_epi16() { -    Test_mm_unpack_epiXX<ui16, 16, 8, 4, Wrap(_mm_unpackhi_epi16), uint16x8_t>(); -} - -void TSSEEmulTest::Test_mm_unpacklo_epi32() { -    Test_mm_unpack_epiXX<ui32, 32, 4, 0, Wrap(_mm_unpacklo_epi32), uint32x4_t>(); -} - -void TSSEEmulTest::Test_mm_unpackhi_epi32() { -    Test_mm_unpack_epiXX<ui32, 32, 4, 2, Wrap(_mm_unpackhi_epi32), uint32x4_t>(); -} - -void TSSEEmulTest::Test_mm_unpacklo_epi64() { -    Test_mm_unpack_epiXX<ui64, 64, 2, 0, Wrap(_mm_unpacklo_epi64), uint64x2_t>(); -} - -void TSSEEmulTest::Test_mm_unpackhi_epi64() { -    Test_mm_unpack_epiXX<ui64, 64, 2, 1, Wrap(_mm_unpackhi_epi64), uint64x2_t>(); -} - -template <typename TElem, unsigned elemCount, -          typename TFunc, typename TElemFunc, +void TSSEEmulTest::Test_mm_max_ps() {  +    struct THelper {  +        static float Call(const float op1, const float op2) {  +            return std::max(op1, op2);  +        }  +    };  +    Test_mm_dualop<float, 4, WrapF(_mm_max_ps), THelper, float32x4_t, __m128>();  +}  +  +void TSSEEmulTest::Test_mm_min_ps() {  +    struct THelper {  +        static float Call(const float op1, const float op2) {  +            return std::min(op1, op2);  +        }  +    };  +    Test_mm_dualop<float, 4, WrapF(_mm_min_ps), THelper, float32x4_t, __m128>();  +}  +  +void TSSEEmulTest::Test_mm_and_ps() {  +    struct THelper {  +        static float Call(const float op1, const float op2) {  +            union Cast {  +                unsigned int AsUInt;  +                float AsFloat;  +            };  +            Cast v1, v2, result;  +            v1.AsFloat = op1;  +            v2.AsFloat = op2;  +            result.AsUInt = v1.AsUInt & v2.AsUInt;  +            return result.AsFloat;  +        }  +    };  +    Test_mm_dualcmp<float, 4, WrapF(_mm_and_ps),  +                    THelper, float32x4_t, __m128>();  +}  +  +template <typename TElem, int bits, int elemCount, int shift,  +          typename TFunc, typename TOp>  +void TSSEEmulTest::Test_mm_unpack_epiXX() {  +    char data1[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    char data2[16] = {  +        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44',  +        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'};  +    TElem* dataw1 = reinterpret_cast<TElem*>(&data1);  +    TElem* dataw2 = reinterpret_cast<TElem*>(&data2);  +  +    __m128i value1 = _mm_loadu_si128((__m128i*)&data1);  +    __m128i value2 = _mm_loadu_si128((__m128i*)&data2);  +  +    TElem zippedData[elemCount];  +    for (unsigned i = 0; i < elemCount / 2; ++i) {  +        zippedData[i * 2] = dataw1[i + shift];  +        zippedData[i * 2 + 1] = dataw2[i + shift];  +    }  +    __m128i result = TFunc(value1, value2);  +  +    for (unsigned i = 0; i < elemCount / 2; ++i) {  +        UNIT_ASSERT_EQUAL(zippedData[i * 2], TQType<TOp>::As(result)[i * 2]);  +        UNIT_ASSERT_EQUAL(zippedData[i * 2 + 1],  +                          TQType<TOp>::As(result)[i * 2 + 1]);  +    }  +}  +  +void TSSEEmulTest::Test_mm_unpacklo_epi8() {  +    Test_mm_unpack_epiXX<ui8, 8, 16, 0, Wrap(_mm_unpacklo_epi8), uint8x16_t>();  +}  +  +void TSSEEmulTest::Test_mm_unpackhi_epi8() {  +    Test_mm_unpack_epiXX<ui8, 8, 16, 8, Wrap(_mm_unpackhi_epi8), uint8x16_t>();  +}  +  +void TSSEEmulTest::Test_mm_unpacklo_epi16() {  +    Test_mm_unpack_epiXX<ui16, 16, 8, 0, Wrap(_mm_unpacklo_epi16), uint16x8_t>();  +}  +  +void TSSEEmulTest::Test_mm_unpackhi_epi16() {  +    Test_mm_unpack_epiXX<ui16, 16, 8, 4, Wrap(_mm_unpackhi_epi16), uint16x8_t>();  +}  +  +void TSSEEmulTest::Test_mm_unpacklo_epi32() {  +    Test_mm_unpack_epiXX<ui32, 32, 4, 0, Wrap(_mm_unpacklo_epi32), uint32x4_t>();  +}  +  +void TSSEEmulTest::Test_mm_unpackhi_epi32() {  +    Test_mm_unpack_epiXX<ui32, 32, 4, 2, Wrap(_mm_unpackhi_epi32), uint32x4_t>();  +}  +  +void TSSEEmulTest::Test_mm_unpacklo_epi64() {  +    Test_mm_unpack_epiXX<ui64, 64, 2, 0, Wrap(_mm_unpacklo_epi64), uint64x2_t>();  +}  +  +void TSSEEmulTest::Test_mm_unpackhi_epi64() {  +    Test_mm_unpack_epiXX<ui64, 64, 2, 1, Wrap(_mm_unpackhi_epi64), uint64x2_t>();  +}  +  +template <typename TElem, unsigned elemCount,  +          typename TFunc, typename TElemFunc,             typename TOp, typename TVectorType> -void TSSEEmulTest::Test_mm_dualop() { -    char data1[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    char data2[16] = { -        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', -        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'}; -    TElem* dataw1 = reinterpret_cast<TElem*>(&data1); -    TElem* dataw2 = reinterpret_cast<TElem*>(&data2); - +void TSSEEmulTest::Test_mm_dualop() {  +    char data1[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    char data2[16] = {  +        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44',  +        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'};  +    TElem* dataw1 = reinterpret_cast<TElem*>(&data1);  +    TElem* dataw2 = reinterpret_cast<TElem*>(&data2);  +       TVectorType value1 = TFuncLoad<TVectorType>(&data1);      TVectorType value2 = TFuncLoad<TVectorType>(&data2); - -    TElem procData[elemCount]; -    for (unsigned i = 0; i < elemCount; ++i) { -        procData[i] = TElemFunc::Call(dataw1[i], dataw2[i]); -    } +  +    TElem procData[elemCount];  +    for (unsigned i = 0; i < elemCount; ++i) {  +        procData[i] = TElemFunc::Call(dataw1[i], dataw2[i]);  +    }       TVectorType result = TFunc(value1, value2); - -    for (unsigned i = 0; i < elemCount; ++i) { -        UNIT_ASSERT_EQUAL(procData[i], TQType<TOp>::As(result)[i]); -    } -} - -/* This is almost the same as Test_mm_dualop, -   but different data1 and data2 */ -template <typename TElem, unsigned elemCount, -          typename TFunc, typename TElemFunc, +  +    for (unsigned i = 0; i < elemCount; ++i) {  +        UNIT_ASSERT_EQUAL(procData[i], TQType<TOp>::As(result)[i]);  +    }  +}  +  +/* This is almost the same as Test_mm_dualop,  +   but different data1 and data2 */  +template <typename TElem, unsigned elemCount,  +          typename TFunc, typename TElemFunc,             typename TOp, typename TVectorType> -void TSSEEmulTest::Test_mm_dualcmp() { -    char data1[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x66', '\x77', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C'}; -    char data2[16] = { -        '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44', -        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'}; -    TElem* dataw1 = reinterpret_cast<TElem*>(&data1); -    TElem* dataw2 = reinterpret_cast<TElem*>(&data2); - +void TSSEEmulTest::Test_mm_dualcmp() {  +    char data1[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x66', '\x77', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C'};  +    char data2[16] = {  +        '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44',  +        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'};  +    TElem* dataw1 = reinterpret_cast<TElem*>(&data1);  +    TElem* dataw2 = reinterpret_cast<TElem*>(&data2);  +       TVectorType value1 = TFuncLoad<TVectorType>(&data1);      TVectorType value2 = TFuncLoad<TVectorType>(&data2); - -    TElem procData[elemCount]; -    for (unsigned i = 0; i < elemCount; ++i) { -        procData[i] = TElemFunc::Call(dataw1[i], dataw2[i]); -    } +  +    TElem procData[elemCount];  +    for (unsigned i = 0; i < elemCount; ++i) {  +        procData[i] = TElemFunc::Call(dataw1[i], dataw2[i]);  +    }       TVectorType result = TFunc(value1, value2); - -    for (unsigned i = 0; i < elemCount; ++i) { -        /* memcmp is for compare to invalid floats in results */ +  +    for (unsigned i = 0; i < elemCount; ++i) {  +        /* memcmp is for compare to invalid floats in results */           const TElem value = TQType<TOp>::As(result)[i];          UNIT_ASSERT(memcmp(&(procData[i]), &value, sizeof(TElem)) == 0); -    } -} - -void TSSEEmulTest::Test_mm_or_si128() { -    struct THelper { -        static ui64 Call(const ui64 op1, const ui64 op2) { -            return op1 | op2; -        } -    }; - -    Test_mm_dualop<ui64, 2, Wrap(_mm_or_si128), THelper, uint64x2_t>(); -} - -void TSSEEmulTest::Test_mm_and_si128() { -    struct THelper { -        static ui64 Call(const ui64 op1, const ui64 op2) { -            return op1 & op2; -        } -    }; - -    Test_mm_dualop<ui64, 2, Wrap(_mm_and_si128), THelper, uint64x2_t>(); -} - -void TSSEEmulTest::Test_mm_andnot_si128() { -    struct THelper { -        static ui64 Call(const ui64 op1, const ui64 op2) { -            return (~op1) & op2; -        } -    }; - -    Test_mm_dualop<ui64, 2, Wrap(_mm_andnot_si128), THelper, uint64x2_t>(); -} - -template <typename TElem> -struct THelperCMPEQ { -    static TElem Call(const TElem op1, const TElem op2) { -        return op1 == op2 ? ~TElem(0) : TElem(0); -    } -}; - -void TSSEEmulTest::Test_mm_cmpeq_epi8() { -    Test_mm_dualcmp<ui8, 16, Wrap(_mm_cmpeq_epi8), -                    THelperCMPEQ<ui8>, uint8x16_t>(); -} - -void TSSEEmulTest::Test_mm_cmpeq_epi16() { -    Test_mm_dualcmp<ui16, 8, Wrap(_mm_cmpeq_epi16), -                    THelperCMPEQ<ui16>, uint16x8_t>(); -} - -void TSSEEmulTest::Test_mm_cmpeq_epi32() { -    Test_mm_dualcmp<ui32, 4, Wrap(_mm_cmpeq_epi32), -                    THelperCMPEQ<ui32>, uint32x4_t>(); -} - -void TSSEEmulTest::Test_mm_cmpeq_ps() { -    struct THelperFloat { -        static float Call(const float op1, const float op2) { -            union Cast { -                unsigned int AsUInt; -                float AsFloat; -            }; -            Cast value; -            value.AsUInt = op1 == op2 ? 0xFFFFFFFF : 0; -            return value.AsFloat; -        } -    }; - -    Test_mm_dualcmp<float, 4, WrapF(_mm_cmpeq_ps), -                    THelperFloat, float32x4_t, __m128>(); -} - -template <typename TElem> -struct THelperCMPGT { -    static TElem Call(const TElem op1, const TElem op2) { -        return op1 > op2 ? ~TElem(0) : TElem(0); -    } -}; - -void TSSEEmulTest::Test_mm_cmpgt_epi8() { -    Test_mm_dualcmp<i8, 16, Wrap(_mm_cmpgt_epi8), -                    THelperCMPGT<i8>, int8x16_t>(); -} - -void TSSEEmulTest::Test_mm_cmpgt_epi16() { -    Test_mm_dualcmp<i16, 8, Wrap(_mm_cmpgt_epi16), -                    THelperCMPGT<i16>, int16x8_t>(); -} - -void TSSEEmulTest::Test_mm_cmpgt_epi32() { -    Test_mm_dualcmp<i32, 4, Wrap(_mm_cmpgt_epi32), -                    THelperCMPGT<i32>, int32x4_t>(); -} - -void TSSEEmulTest::Test_mm_cmpgt_ps() { -    struct THelperFloat { -        static float Call(const float op1, const float op2) { -            union Cast { -                unsigned int AsUInt; -                float AsFloat; -            }; -            Cast value; -            value.AsUInt = op1 > op2 ? 0xFFFFFFFF : 0; -            return value.AsFloat; -        } -    }; - -    Test_mm_dualcmp<float, 4, WrapF(_mm_cmpgt_ps), -                    THelperFloat, float32x4_t, __m128>(); -} - -template <typename TElem> -struct THelperCMPLT { -    static TElem Call(const TElem op1, const TElem op2) { -        return op1 < op2 ? ~TElem(0) : TElem(0); -    } -}; - -void TSSEEmulTest::Test_mm_cmplt_epi8() { -    Test_mm_dualcmp<i8, 16, Wrap(_mm_cmplt_epi8), -                    THelperCMPLT<i8>, int8x16_t>(); -} - -void TSSEEmulTest::Test_mm_cmplt_epi16() { -    Test_mm_dualcmp<i16, 8, Wrap(_mm_cmplt_epi16), -                    THelperCMPLT<i16>, int16x8_t>(); -} - -void TSSEEmulTest::Test_mm_cmplt_epi32() { -    Test_mm_dualcmp<i32, 4, Wrap(_mm_cmplt_epi32), -                    THelperCMPLT<i32>, int32x4_t>(); -} - -template <typename TElem, int elemCount, +    }  +}  +  +void TSSEEmulTest::Test_mm_or_si128() {  +    struct THelper {  +        static ui64 Call(const ui64 op1, const ui64 op2) {  +            return op1 | op2;  +        }  +    };  +  +    Test_mm_dualop<ui64, 2, Wrap(_mm_or_si128), THelper, uint64x2_t>();  +}  +  +void TSSEEmulTest::Test_mm_and_si128() {  +    struct THelper {  +        static ui64 Call(const ui64 op1, const ui64 op2) {  +            return op1 & op2;  +        }  +    };  +  +    Test_mm_dualop<ui64, 2, Wrap(_mm_and_si128), THelper, uint64x2_t>();  +}  +  +void TSSEEmulTest::Test_mm_andnot_si128() {  +    struct THelper {  +        static ui64 Call(const ui64 op1, const ui64 op2) {  +            return (~op1) & op2;  +        }  +    };  +  +    Test_mm_dualop<ui64, 2, Wrap(_mm_andnot_si128), THelper, uint64x2_t>();  +}  +  +template <typename TElem>  +struct THelperCMPEQ {  +    static TElem Call(const TElem op1, const TElem op2) {  +        return op1 == op2 ? ~TElem(0) : TElem(0);  +    }  +};  +  +void TSSEEmulTest::Test_mm_cmpeq_epi8() {  +    Test_mm_dualcmp<ui8, 16, Wrap(_mm_cmpeq_epi8),  +                    THelperCMPEQ<ui8>, uint8x16_t>();  +}  +  +void TSSEEmulTest::Test_mm_cmpeq_epi16() {  +    Test_mm_dualcmp<ui16, 8, Wrap(_mm_cmpeq_epi16),  +                    THelperCMPEQ<ui16>, uint16x8_t>();  +}  +  +void TSSEEmulTest::Test_mm_cmpeq_epi32() {  +    Test_mm_dualcmp<ui32, 4, Wrap(_mm_cmpeq_epi32),  +                    THelperCMPEQ<ui32>, uint32x4_t>();  +}  +  +void TSSEEmulTest::Test_mm_cmpeq_ps() {  +    struct THelperFloat {  +        static float Call(const float op1, const float op2) {  +            union Cast {  +                unsigned int AsUInt;  +                float AsFloat;  +            };  +            Cast value;  +            value.AsUInt = op1 == op2 ? 0xFFFFFFFF : 0;  +            return value.AsFloat;  +        }  +    };  +  +    Test_mm_dualcmp<float, 4, WrapF(_mm_cmpeq_ps),  +                    THelperFloat, float32x4_t, __m128>();  +}  +  +template <typename TElem>  +struct THelperCMPGT {  +    static TElem Call(const TElem op1, const TElem op2) {  +        return op1 > op2 ? ~TElem(0) : TElem(0);  +    }  +};  +  +void TSSEEmulTest::Test_mm_cmpgt_epi8() {  +    Test_mm_dualcmp<i8, 16, Wrap(_mm_cmpgt_epi8),  +                    THelperCMPGT<i8>, int8x16_t>();  +}  +  +void TSSEEmulTest::Test_mm_cmpgt_epi16() {  +    Test_mm_dualcmp<i16, 8, Wrap(_mm_cmpgt_epi16),  +                    THelperCMPGT<i16>, int16x8_t>();  +}  +  +void TSSEEmulTest::Test_mm_cmpgt_epi32() {  +    Test_mm_dualcmp<i32, 4, Wrap(_mm_cmpgt_epi32),  +                    THelperCMPGT<i32>, int32x4_t>();  +}  +  +void TSSEEmulTest::Test_mm_cmpgt_ps() {  +    struct THelperFloat {  +        static float Call(const float op1, const float op2) {  +            union Cast {  +                unsigned int AsUInt;  +                float AsFloat;  +            };  +            Cast value;  +            value.AsUInt = op1 > op2 ? 0xFFFFFFFF : 0;  +            return value.AsFloat;  +        }  +    };  +  +    Test_mm_dualcmp<float, 4, WrapF(_mm_cmpgt_ps),  +                    THelperFloat, float32x4_t, __m128>();  +}  +  +template <typename TElem>  +struct THelperCMPLT {  +    static TElem Call(const TElem op1, const TElem op2) {  +        return op1 < op2 ? ~TElem(0) : TElem(0);  +    }  +};  +  +void TSSEEmulTest::Test_mm_cmplt_epi8() {  +    Test_mm_dualcmp<i8, 16, Wrap(_mm_cmplt_epi8),  +                    THelperCMPLT<i8>, int8x16_t>();  +}  +  +void TSSEEmulTest::Test_mm_cmplt_epi16() {  +    Test_mm_dualcmp<i16, 8, Wrap(_mm_cmplt_epi16),  +                    THelperCMPLT<i16>, int16x8_t>();  +}  +  +void TSSEEmulTest::Test_mm_cmplt_epi32() {  +    Test_mm_dualcmp<i32, 4, Wrap(_mm_cmplt_epi32),  +                    THelperCMPLT<i32>, int32x4_t>();  +}  +  +template <typename TElem, int elemCount,             typename TFunc, typename TOp, typename TVectorType> -void TSSEEmulTest::Test_mm_setter_epiXX() { -    char data[64] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x00', '\x55', '\x77', '\x66', '\x1C', -        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', -        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF', -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x00', '\x00', '\x00', -        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C', -        '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44', -        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'}; -    TElem* dataw = reinterpret_cast<TElem*>(&data); - -    for (unsigned dataItem = 0; dataItem < elemCount * 4; ++dataItem) { +void TSSEEmulTest::Test_mm_setter_epiXX() {  +    char data[64] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x00', '\x55', '\x77', '\x66', '\x1C',  +        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44',  +        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF',  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x00', '\x00', '\x00',  +        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C',  +        '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44',  +        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'};  +    TElem* dataw = reinterpret_cast<TElem*>(&data);  +  +    for (unsigned dataItem = 0; dataItem < elemCount * 4; ++dataItem) {           TVectorType value = TFunc(dataw[dataItem]); - -        for (unsigned i = 0; i < elemCount; ++i) -            UNIT_ASSERT_EQUAL(dataw[dataItem], TQType<TOp>::As(value)[i]); -    } -} - -void TSSEEmulTest::Test_mm_set1_epi8() { -    Test_mm_setter_epiXX<i8, 16, Wrap(_mm_set1_epi8), int8x16_t, __m128i>(); -} -void TSSEEmulTest::Test_mm_set1_epi16() { -    Test_mm_setter_epiXX<i16, 8, Wrap(_mm_set1_epi16), int16x8_t, __m128i>(); -} -void TSSEEmulTest::Test_mm_set1_epi32() { -    Test_mm_setter_epiXX<i32, 4, Wrap(_mm_set1_epi32), int32x4_t, __m128i>(); -} -void TSSEEmulTest::Test_mm_set1_ps() { -    Test_mm_setter_epiXX<float, 4, WrapF(_mm_set1_ps), float32x4_t, __m128>(); -} - +  +        for (unsigned i = 0; i < elemCount; ++i)  +            UNIT_ASSERT_EQUAL(dataw[dataItem], TQType<TOp>::As(value)[i]);  +    }  +}  +  +void TSSEEmulTest::Test_mm_set1_epi8() {  +    Test_mm_setter_epiXX<i8, 16, Wrap(_mm_set1_epi8), int8x16_t, __m128i>();  +}  +void TSSEEmulTest::Test_mm_set1_epi16() {  +    Test_mm_setter_epiXX<i16, 8, Wrap(_mm_set1_epi16), int16x8_t, __m128i>();  +}  +void TSSEEmulTest::Test_mm_set1_epi32() {  +    Test_mm_setter_epiXX<i32, 4, Wrap(_mm_set1_epi32), int32x4_t, __m128i>();  +}  +void TSSEEmulTest::Test_mm_set1_ps() {  +    Test_mm_setter_epiXX<float, 4, WrapF(_mm_set1_ps), float32x4_t, __m128>();  +}  +   void TSSEEmulTest::Test_mm_set_ps1() {      Test_mm_setter_epiXX<float, 4, WrapF(_mm_set_ps1), float32x4_t, __m128>();  } -void TSSEEmulTest::Test_mm_setzero_si128() { -    __m128i value = _mm_setzero_si128(); -    for (unsigned i = 0; i < 4; ++i) -        UNIT_ASSERT_EQUAL(0, TQType<uint32x4_t>::As(value)[i]); -} - -void TSSEEmulTest::Test_mm_setzero_ps() { -    __m128 value = _mm_setzero_ps(); -    for (unsigned i = 0; i < 4; ++i) -        UNIT_ASSERT_EQUAL(0.0, TQType<float32x4_t>::As(value)[i]); -} - +void TSSEEmulTest::Test_mm_setzero_si128() {  +    __m128i value = _mm_setzero_si128();  +    for (unsigned i = 0; i < 4; ++i)  +        UNIT_ASSERT_EQUAL(0, TQType<uint32x4_t>::As(value)[i]);  +}  +  +void TSSEEmulTest::Test_mm_setzero_ps() {  +    __m128 value = _mm_setzero_ps();  +    for (unsigned i = 0; i < 4; ++i)  +        UNIT_ASSERT_EQUAL(0.0, TQType<float32x4_t>::As(value)[i]);  +}  +   void TSSEEmulTest::Test_mm_setzero_pd() {      __m128d value = _mm_setzero_pd();      for (unsigned i = 0; i < 2; ++i)          UNIT_ASSERT_EQUAL(0.0, TQType<float64x2_t>::As(value)[i]);  } -void TSSEEmulTest::Test_mm_loadl_epi64() { -    char data[64] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x00', '\x55', '\x77', '\x66', '\x1C', -        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', -        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF', -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x00', '\x00', '\x00', -        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C', -        '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44', -        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'}; -    ui64* dataw = reinterpret_cast<ui64*>(&data); - -    for (unsigned dataItem = 0; dataItem < 8; ++dataItem) { -        __m128i value = _mm_loadl_epi64((__m128i const*)&dataw[dataItem]); - -        UNIT_ASSERT_EQUAL(dataw[dataItem], TQType<uint64x2_t>::As(value)[0]); -        UNIT_ASSERT_EQUAL(0, TQType<uint64x2_t>::As(value)[1]); -    } -} - -void TSSEEmulTest::Test_mm_storel_epi64() { -    char data[64] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x00', '\x55', '\x77', '\x66', '\x1C', -        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', -        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF', -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x00', '\x00', '\x00', -        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C', -        '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44', -        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'}; -    ui64* dataw = reinterpret_cast<ui64*>(&data); - -    for (unsigned dataItem = 0; dataItem < 4; ++dataItem) { -        __m128i value = _mm_loadu_si128((__m128i*)&dataw[dataItem * 2]); - -        ui64 buf[2] = {55, 81}; -        _mm_storel_epi64((__m128i*)&buf, value); - -        UNIT_ASSERT_EQUAL(dataw[dataItem * 2], buf[0]); -        UNIT_ASSERT_EQUAL(81, buf[1]); -    } -} - -void TSSEEmulTest::Test_mm_shuffle_epi32() { -    char data[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    ui32* dataw = reinterpret_cast<ui32*>(&data); -    __m128i value = _mm_loadu_si128((__m128i*)&data); - -    int coding[4] = {1, 3, 0, 2}; -    __m128i result = _mm_shuffle_epi32(value, _MM_SHUFFLE(2, 0, 3, 1)); - -    for (unsigned i = 0; i < 4; ++i) -        UNIT_ASSERT_EQUAL(dataw[coding[i]], -                          TQType<uint32x4_t>::As(result)[i]); -} - -static int GetHighBitAt(char data, int at) { -    ui8 udata = data & 0x80; -    return int(udata >> 7) << at; -} - -void TSSEEmulTest::Test_mm_movemask_epi8() { -    char data[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    __m128i value = _mm_loadu_si128((__m128i*)&data); - -    int result = _mm_movemask_epi8(value); -    int verify = 0; -    for (unsigned i = 0; i < 16; ++i) { -        verify |= GetHighBitAt(data[i], i); -    } - -    UNIT_ASSERT_EQUAL(result, verify); -} - -void TSSEEmulTest::Test_mm_movemask_ps() { -    char data[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    __m128 value = _mm_loadu_ps((float*)&data); - -    int result = _mm_movemask_ps(value); -    int verify = 0; -    for (unsigned i = 0; i < 4; ++i) { -        verify |= GetHighBitAt(data[i * 4 + 3], i); -    } - -    UNIT_ASSERT_EQUAL(result, verify); -} - +void TSSEEmulTest::Test_mm_loadl_epi64() {  +    char data[64] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x00', '\x55', '\x77', '\x66', '\x1C',  +        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44',  +        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF',  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x00', '\x00', '\x00',  +        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C',  +        '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44',  +        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'};  +    ui64* dataw = reinterpret_cast<ui64*>(&data);  +  +    for (unsigned dataItem = 0; dataItem < 8; ++dataItem) {  +        __m128i value = _mm_loadl_epi64((__m128i const*)&dataw[dataItem]);  +  +        UNIT_ASSERT_EQUAL(dataw[dataItem], TQType<uint64x2_t>::As(value)[0]);  +        UNIT_ASSERT_EQUAL(0, TQType<uint64x2_t>::As(value)[1]);  +    }  +}  +  +void TSSEEmulTest::Test_mm_storel_epi64() {  +    char data[64] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x00', '\x55', '\x77', '\x66', '\x1C',  +        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44',  +        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF',  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x00', '\x00', '\x00',  +        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x66', '\x1C',  +        '\x99', '\x33', '\xFF', '\xCC', '\x88', '\x66', '\x77', '\x44',  +        '\x33', '\x99', '\x44', '\x88', '\xCC', '\xBB', '\x22', '\xFF'};  +    ui64* dataw = reinterpret_cast<ui64*>(&data);  +  +    for (unsigned dataItem = 0; dataItem < 4; ++dataItem) {  +        __m128i value = _mm_loadu_si128((__m128i*)&dataw[dataItem * 2]);  +  +        ui64 buf[2] = {55, 81};  +        _mm_storel_epi64((__m128i*)&buf, value);  +  +        UNIT_ASSERT_EQUAL(dataw[dataItem * 2], buf[0]);  +        UNIT_ASSERT_EQUAL(81, buf[1]);  +    }  +}  +  +void TSSEEmulTest::Test_mm_shuffle_epi32() {  +    char data[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    ui32* dataw = reinterpret_cast<ui32*>(&data);  +    __m128i value = _mm_loadu_si128((__m128i*)&data);  +  +    int coding[4] = {1, 3, 0, 2};  +    __m128i result = _mm_shuffle_epi32(value, _MM_SHUFFLE(2, 0, 3, 1));  +  +    for (unsigned i = 0; i < 4; ++i)  +        UNIT_ASSERT_EQUAL(dataw[coding[i]],  +                          TQType<uint32x4_t>::As(result)[i]);  +}  +  +static int GetHighBitAt(char data, int at) {  +    ui8 udata = data & 0x80;  +    return int(udata >> 7) << at;  +}  +  +void TSSEEmulTest::Test_mm_movemask_epi8() {  +    char data[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    __m128i value = _mm_loadu_si128((__m128i*)&data);  +  +    int result = _mm_movemask_epi8(value);  +    int verify = 0;  +    for (unsigned i = 0; i < 16; ++i) {  +        verify |= GetHighBitAt(data[i], i);  +    }  +  +    UNIT_ASSERT_EQUAL(result, verify);  +}  +  +void TSSEEmulTest::Test_mm_movemask_ps() {  +    char data[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    __m128 value = _mm_loadu_ps((float*)&data);  +  +    int result = _mm_movemask_ps(value);  +    int verify = 0;  +    for (unsigned i = 0; i < 4; ++i) {  +        verify |= GetHighBitAt(data[i * 4 + 3], i);  +    }  +  +    UNIT_ASSERT_EQUAL(result, verify);  +}  +   void TSSEEmulTest::Test_mm_movemask_ps_2() {      char data[16] = {          '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', @@ -1414,19 +1414,19 @@ void TSSEEmulTest::Test_mm_movemask_ps_2() {      UNIT_ASSERT_EQUAL(result, 0xf);  } -void TSSEEmulTest::Test_mm_cvtsi128_si32() { -    char data[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    __m128i value = _mm_loadu_si128((__m128i*)&data); - -    int result = _mm_cvtsi128_si32(value); -    i32* datap = reinterpret_cast<i32*>(&data); -    int verify = datap[0]; - -    UNIT_ASSERT_EQUAL(result, verify); -} - +void TSSEEmulTest::Test_mm_cvtsi128_si32() {  +    char data[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    __m128i value = _mm_loadu_si128((__m128i*)&data);  +  +    int result = _mm_cvtsi128_si32(value);  +    i32* datap = reinterpret_cast<i32*>(&data);  +    int verify = datap[0];  +  +    UNIT_ASSERT_EQUAL(result, verify);  +}  +   void TSSEEmulTest::Test_mm_cvtsi128_si64() {      char data[16] = {          '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', @@ -1440,52 +1440,52 @@ void TSSEEmulTest::Test_mm_cvtsi128_si64() {      UNIT_ASSERT_EQUAL(result, verify);  } -void TSSEEmulTest::Test_mm_set_epi16() { -    char data[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    i16* dataw = reinterpret_cast<i16*>(&data); -    ui64* dataq = reinterpret_cast<ui64*>(&data); - -    __m128i result = _mm_set_epi16(dataw[7], dataw[6], dataw[5], dataw[4], -                                   dataw[3], dataw[2], dataw[1], dataw[0]); -    ui64 buf[2] = {53, 81}; -    _mm_storeu_si128((__m128i*)&buf, result); - -    UNIT_ASSERT_EQUAL(buf[0], dataq[0]); -    UNIT_ASSERT_EQUAL(buf[1], dataq[1]); -} - -void TSSEEmulTest::Test_mm_set_epi32() { -    char data[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    i32* dataw = reinterpret_cast<i32*>(&data); -    ui64* dataq = reinterpret_cast<ui64*>(&data); - -    __m128i result = _mm_set_epi32(dataw[3], dataw[2], dataw[1], dataw[0]); -    ui64 buf[2] = {53, 81}; -    _mm_storeu_si128((__m128i*)&buf, result); - -    UNIT_ASSERT_EQUAL(buf[0], dataq[0]); -    UNIT_ASSERT_EQUAL(buf[1], dataq[1]); -} - -void TSSEEmulTest::Test_mm_set_ps() { -    char data[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    float* dataw = reinterpret_cast<float*>(&data); -    ui64* dataq = reinterpret_cast<ui64*>(&data); - -    __m128 result = _mm_set_ps(dataw[3], dataw[2], dataw[1], dataw[0]); -    ui64 buf[2] = {53, 81}; -    _mm_storeu_ps((float*)&buf, result); - -    UNIT_ASSERT_EQUAL(buf[0], dataq[0]); -    UNIT_ASSERT_EQUAL(buf[1], dataq[1]); -} - +void TSSEEmulTest::Test_mm_set_epi16() {  +    char data[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    i16* dataw = reinterpret_cast<i16*>(&data);  +    ui64* dataq = reinterpret_cast<ui64*>(&data);  +  +    __m128i result = _mm_set_epi16(dataw[7], dataw[6], dataw[5], dataw[4],  +                                   dataw[3], dataw[2], dataw[1], dataw[0]);  +    ui64 buf[2] = {53, 81};  +    _mm_storeu_si128((__m128i*)&buf, result);  +  +    UNIT_ASSERT_EQUAL(buf[0], dataq[0]);  +    UNIT_ASSERT_EQUAL(buf[1], dataq[1]);  +}  +  +void TSSEEmulTest::Test_mm_set_epi32() {  +    char data[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    i32* dataw = reinterpret_cast<i32*>(&data);  +    ui64* dataq = reinterpret_cast<ui64*>(&data);  +  +    __m128i result = _mm_set_epi32(dataw[3], dataw[2], dataw[1], dataw[0]);  +    ui64 buf[2] = {53, 81};  +    _mm_storeu_si128((__m128i*)&buf, result);  +  +    UNIT_ASSERT_EQUAL(buf[0], dataq[0]);  +    UNIT_ASSERT_EQUAL(buf[1], dataq[1]);  +}  +  +void TSSEEmulTest::Test_mm_set_ps() {  +    char data[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    float* dataw = reinterpret_cast<float*>(&data);  +    ui64* dataq = reinterpret_cast<ui64*>(&data);  +  +    __m128 result = _mm_set_ps(dataw[3], dataw[2], dataw[1], dataw[0]);  +    ui64 buf[2] = {53, 81};  +    _mm_storeu_ps((float*)&buf, result);  +  +    UNIT_ASSERT_EQUAL(buf[0], dataq[0]);  +    UNIT_ASSERT_EQUAL(buf[1], dataq[1]);  +}  +   void TSSEEmulTest::Test_mm_set_pd() {      char data[16] = {          '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', @@ -1501,22 +1501,22 @@ void TSSEEmulTest::Test_mm_set_pd() {      UNIT_ASSERT_EQUAL(buf[1], dataq[1]);  } -void TSSEEmulTest::Test_mm_cvtsi32_si128() { -    char data[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    i32* dataw = reinterpret_cast<i32*>(&data); - -    __m128i result = _mm_cvtsi32_si128(dataw[0]); -    i32 buf[4] = {53, 81, -43, 2132}; -    _mm_storeu_si128((__m128i*)&buf, result); - -    UNIT_ASSERT_EQUAL(buf[0], dataw[0]); -    UNIT_ASSERT_EQUAL(buf[1], 0); -    UNIT_ASSERT_EQUAL(buf[2], 0); -    UNIT_ASSERT_EQUAL(buf[3], 0); -} - +void TSSEEmulTest::Test_mm_cvtsi32_si128() {  +    char data[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    i32* dataw = reinterpret_cast<i32*>(&data);  +  +    __m128i result = _mm_cvtsi32_si128(dataw[0]);  +    i32 buf[4] = {53, 81, -43, 2132};  +    _mm_storeu_si128((__m128i*)&buf, result);  +  +    UNIT_ASSERT_EQUAL(buf[0], dataw[0]);  +    UNIT_ASSERT_EQUAL(buf[1], 0);  +    UNIT_ASSERT_EQUAL(buf[2], 0);  +    UNIT_ASSERT_EQUAL(buf[3], 0);  +}  +   void TSSEEmulTest::Test_mm_cvtsi64_si128() {      char data[16] = {          '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', @@ -1531,44 +1531,44 @@ void TSSEEmulTest::Test_mm_cvtsi64_si128() {      UNIT_ASSERT_EQUAL(buf[1], 0);  } -template <typename TElem, typename TNarrow, unsigned elemCount, typename TFunc> -void TSSEEmulTest::Test_mm_packs_epiXX() { -    char data[32] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x00', '\x66', '\x1C', -        '\x99', '\x33', '\x1C', '\x55', '\x00', '\x00', '\x00', '\x00', -        '\x00', '\xAA', '\x00', '\x00', '\xCC', '\xBB', '\x22', '\xFF'}; -    __m128i value0 = _mm_loadu_si128((__m128i*)&data); -    __m128i value1 = _mm_loadu_si128(((__m128i*)&data) + 1); -    TElem* dataw = reinterpret_cast<TElem*>(&data); - -    __m128i result = TFunc(value0, value1); - -    TNarrow verify[elemCount]; -    for (unsigned i = 0; i < elemCount; ++i) { -        TElem sum = dataw[i]; -        if (sum > std::numeric_limits<TNarrow>::max()) -            sum = std::numeric_limits<TNarrow>::max(); -        if (sum < std::numeric_limits<TNarrow>::min()) -            sum = std::numeric_limits<TNarrow>::min(); -        verify[i] = TNarrow(sum); -    } - -    ui64* verifyp = (ui64*)&verify; -    UNIT_ASSERT_EQUAL(verifyp[0], TQType<uint64x2_t>::As(result)[0]); -    UNIT_ASSERT_EQUAL(verifyp[1], TQType<uint64x2_t>::As(result)[1]); -} - -void TSSEEmulTest::Test_mm_packs_epi16() { -    Test_mm_packs_epiXX<i16, i8, 16, Wrap(_mm_packs_epi16)>(); -} -void TSSEEmulTest::Test_mm_packs_epi32() { -    Test_mm_packs_epiXX<i32, i16, 8, Wrap(_mm_packs_epi32)>(); -} -void TSSEEmulTest::Test_mm_packus_epi16() { -    Test_mm_packs_epiXX<i16, ui8, 16, Wrap(_mm_packus_epi16)>(); -} - +template <typename TElem, typename TNarrow, unsigned elemCount, typename TFunc>  +void TSSEEmulTest::Test_mm_packs_epiXX() {  +    char data[32] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x00', '\x66', '\x1C',  +        '\x99', '\x33', '\x1C', '\x55', '\x00', '\x00', '\x00', '\x00',  +        '\x00', '\xAA', '\x00', '\x00', '\xCC', '\xBB', '\x22', '\xFF'};  +    __m128i value0 = _mm_loadu_si128((__m128i*)&data);  +    __m128i value1 = _mm_loadu_si128(((__m128i*)&data) + 1);  +    TElem* dataw = reinterpret_cast<TElem*>(&data);  +  +    __m128i result = TFunc(value0, value1);  +  +    TNarrow verify[elemCount];  +    for (unsigned i = 0; i < elemCount; ++i) {  +        TElem sum = dataw[i];  +        if (sum > std::numeric_limits<TNarrow>::max())  +            sum = std::numeric_limits<TNarrow>::max();  +        if (sum < std::numeric_limits<TNarrow>::min())  +            sum = std::numeric_limits<TNarrow>::min();  +        verify[i] = TNarrow(sum);  +    }  +  +    ui64* verifyp = (ui64*)&verify;  +    UNIT_ASSERT_EQUAL(verifyp[0], TQType<uint64x2_t>::As(result)[0]);  +    UNIT_ASSERT_EQUAL(verifyp[1], TQType<uint64x2_t>::As(result)[1]);  +}  +  +void TSSEEmulTest::Test_mm_packs_epi16() {  +    Test_mm_packs_epiXX<i16, i8, 16, Wrap(_mm_packs_epi16)>();  +}  +void TSSEEmulTest::Test_mm_packs_epi32() {  +    Test_mm_packs_epiXX<i32, i16, 8, Wrap(_mm_packs_epi32)>();  +}  +void TSSEEmulTest::Test_mm_packus_epi16() {  +    Test_mm_packs_epiXX<i16, ui8, 16, Wrap(_mm_packus_epi16)>();  +}  +   void TSSEEmulTest::Test_mm_extract_epi8() {      alignas(16) char data[16] = {          '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', @@ -1594,23 +1594,23 @@ void TSSEEmulTest::Test_mm_extract_epi8() {      UNIT_ASSERT_EQUAL((_mm_extract_epi8(value, 15)), int(dataw[15]));  } -void TSSEEmulTest::Test_mm_extract_epi16() { +void TSSEEmulTest::Test_mm_extract_epi16() {       alignas(16) char data[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};       const ui16* dataw = reinterpret_cast<const ui16*>(&data);      const __m128i value = _mm_loadu_si128((__m128i*)&data); - -    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 0)), int(dataw[0])); -    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 1)), int(dataw[1])); -    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 2)), int(dataw[2])); -    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 3)), int(dataw[3])); -    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 4)), int(dataw[4])); -    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 5)), int(dataw[5])); -    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 6)), int(dataw[6])); -    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 7)), int(dataw[7])); -} - +  +    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 0)), int(dataw[0]));  +    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 1)), int(dataw[1]));  +    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 2)), int(dataw[2]));  +    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 3)), int(dataw[3]));  +    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 4)), int(dataw[4]));  +    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 5)), int(dataw[5]));  +    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 6)), int(dataw[6]));  +    UNIT_ASSERT_EQUAL((_mm_extract_epi16(value, 7)), int(dataw[7]));  +}  +   void TSSEEmulTest::Test_mm_extract_epi64() {      alignas(16) char data[16] = {          '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', @@ -1635,160 +1635,160 @@ void TSSEEmulTest::Test_mm_extract_epi32() {      UNIT_ASSERT_EQUAL((_mm_extract_epi32(value, 3)), int(dataw[3]));  } -void TSSEEmulTest::Test_MM_TRANSPOSE4_PS() { -    char data0[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    char data1[16] = { -        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', -        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'}; -    char data2[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    char data3[16] = { -        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', -        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'}; - -    __m128 value0 = _mm_loadu_ps((float*)&data0); -    __m128 value1 = _mm_loadu_ps((float*)&data1); -    __m128 value2 = _mm_loadu_ps((float*)&data2); -    __m128 value3 = _mm_loadu_ps((float*)&data3); - -    _MM_TRANSPOSE4_PS(value0, value1, value2, value3); - -    ui64 tbuf0[2] = {0, 0}; -    ui64 tbuf1[2] = {0, 0}; -    ui64 tbuf2[2] = {0, 0}; -    ui64 tbuf3[2] = {0, 0}; - -    _mm_storeu_ps((float*)&tbuf0, value0); -    _mm_storeu_ps((float*)&tbuf1, value1); -    _mm_storeu_ps((float*)&tbuf2, value2); -    _mm_storeu_ps((float*)&tbuf3, value3); - -    char tdata0[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x99', '\x33', '\x1C', '\x55', -        '\xAA', '\x00', '\xFF', '\xCC', '\x99', '\x33', '\x1C', '\x55'}; -    char tdata1[16] = { -        '\x11', '\x22', '\xBB', '\xAA', '\x88', '\x66', '\x77', '\x44', -        '\x11', '\x22', '\xBB', '\xAA', '\x88', '\x66', '\x77', '\x44'}; -    char tdata2[16] = { -        '\x33', '\x99', '\x44', '\x88', '\x00', '\xAA', '\xAA', '\x11', -        '\x33', '\x99', '\x44', '\x88', '\x00', '\xAA', '\xAA', '\x11'}; -    char tdata3[16] = { -        '\x55', '\x77', '\x66', '\x1C', '\xCC', '\xBB', '\x22', '\xFF', -        '\x55', '\x77', '\x66', '\x1C', '\xCC', '\xBB', '\x22', '\xFF'}; - -    UNIT_ASSERT(memcmp(tbuf0, tdata0, 16) == 0); -    UNIT_ASSERT(memcmp(tbuf1, tdata1, 16) == 0); -    UNIT_ASSERT(memcmp(tbuf2, tdata2, 16) == 0); -    UNIT_ASSERT(memcmp(tbuf3, tdata3, 16) == 0); -} - -template <typename TFrom, typename TTo, unsigned elemCount, -          typename TLoadVector, typename TResultVector, -          typename TElemFunc, typename TFunc, typename TOp> -void TSSEEmulTest::Test_mm_convertop() { -    char data[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    TFrom* datap = reinterpret_cast<TFrom*>(&data); - -    TLoadVector value = TFuncLoad<TLoadVector>(&data); - -    TTo procData[elemCount]; -    for (unsigned i = 0; i < elemCount; ++i) { -        procData[i] = TElemFunc::Call(datap[i]); -    } - -    TResultVector result = TFunc(value); - -    for (unsigned i = 0; i < elemCount; ++i) { -        UNIT_ASSERT_EQUAL(procData[i], TQType<TOp>::As(result)[i]); -    } -} - -void TSSEEmulTest::Test_mm_cvtepi32_ps() { -    struct THelper { -        static float Call(const i32 op) { -            return float(op); -        } -    }; -    Test_mm_convertop<i32, float, 4, __m128i, __m128, -                      THelper, WrapF(_mm_cvtepi32_ps), float32x4_t>(); -}; - -void TSSEEmulTest::Test_mm_cvtps_epi32() { -    struct THelper { -        static i32 Call(const float op) { -            return i32(op); -        } -    }; -    Test_mm_convertop<float, i32, 4, __m128, __m128i, +void TSSEEmulTest::Test_MM_TRANSPOSE4_PS() {  +    char data0[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    char data1[16] = {  +        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44',  +        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'};  +    char data2[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    char data3[16] = {  +        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44',  +        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'};  +  +    __m128 value0 = _mm_loadu_ps((float*)&data0);  +    __m128 value1 = _mm_loadu_ps((float*)&data1);  +    __m128 value2 = _mm_loadu_ps((float*)&data2);  +    __m128 value3 = _mm_loadu_ps((float*)&data3);  +  +    _MM_TRANSPOSE4_PS(value0, value1, value2, value3);  +  +    ui64 tbuf0[2] = {0, 0};  +    ui64 tbuf1[2] = {0, 0};  +    ui64 tbuf2[2] = {0, 0};  +    ui64 tbuf3[2] = {0, 0};  +  +    _mm_storeu_ps((float*)&tbuf0, value0);  +    _mm_storeu_ps((float*)&tbuf1, value1);  +    _mm_storeu_ps((float*)&tbuf2, value2);  +    _mm_storeu_ps((float*)&tbuf3, value3);  +  +    char tdata0[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x99', '\x33', '\x1C', '\x55',  +        '\xAA', '\x00', '\xFF', '\xCC', '\x99', '\x33', '\x1C', '\x55'};  +    char tdata1[16] = {  +        '\x11', '\x22', '\xBB', '\xAA', '\x88', '\x66', '\x77', '\x44',  +        '\x11', '\x22', '\xBB', '\xAA', '\x88', '\x66', '\x77', '\x44'};  +    char tdata2[16] = {  +        '\x33', '\x99', '\x44', '\x88', '\x00', '\xAA', '\xAA', '\x11',  +        '\x33', '\x99', '\x44', '\x88', '\x00', '\xAA', '\xAA', '\x11'};  +    char tdata3[16] = {  +        '\x55', '\x77', '\x66', '\x1C', '\xCC', '\xBB', '\x22', '\xFF',  +        '\x55', '\x77', '\x66', '\x1C', '\xCC', '\xBB', '\x22', '\xFF'};  +  +    UNIT_ASSERT(memcmp(tbuf0, tdata0, 16) == 0);  +    UNIT_ASSERT(memcmp(tbuf1, tdata1, 16) == 0);  +    UNIT_ASSERT(memcmp(tbuf2, tdata2, 16) == 0);  +    UNIT_ASSERT(memcmp(tbuf3, tdata3, 16) == 0);  +}  +  +template <typename TFrom, typename TTo, unsigned elemCount,  +          typename TLoadVector, typename TResultVector,  +          typename TElemFunc, typename TFunc, typename TOp>  +void TSSEEmulTest::Test_mm_convertop() {  +    char data[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    TFrom* datap = reinterpret_cast<TFrom*>(&data);  +  +    TLoadVector value = TFuncLoad<TLoadVector>(&data);  +  +    TTo procData[elemCount];  +    for (unsigned i = 0; i < elemCount; ++i) {  +        procData[i] = TElemFunc::Call(datap[i]);  +    }  +  +    TResultVector result = TFunc(value);  +  +    for (unsigned i = 0; i < elemCount; ++i) {  +        UNIT_ASSERT_EQUAL(procData[i], TQType<TOp>::As(result)[i]);  +    }  +}  +  +void TSSEEmulTest::Test_mm_cvtepi32_ps() {  +    struct THelper {  +        static float Call(const i32 op) {  +            return float(op);  +        }  +    };  +    Test_mm_convertop<i32, float, 4, __m128i, __m128,  +                      THelper, WrapF(_mm_cvtepi32_ps), float32x4_t>();  +};  +  +void TSSEEmulTest::Test_mm_cvtps_epi32() {  +    struct THelper {  +        static i32 Call(const float op) {  +            return i32(op);  +        }  +    };  +    Test_mm_convertop<float, i32, 4, __m128, __m128i,                         THelper, T_mm_CallWrapper<__m128i, decltype(_mm_cvtps_epi32), _mm_cvtps_epi32>, int32x4_t>(); -}; - -void TSSEEmulTest::Test_mm_cvttps_epi32() { -    struct THelper { -        static i32 Call(const float op) { -            return i32(op); -        } -    }; -    Test_mm_convertop<float, i32, 4, __m128, __m128i, -                      THelper, Wrap(_mm_cvttps_epi32), int32x4_t>(); -}; - -template <typename TLoadVector, typename TCastVector, -          typename TFunc, TFunc* func> -void TSSEEmulTest::Test_mm_castXX() { -    char data[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; - -    TLoadVector value = TFuncLoad<TLoadVector>(&data); -    const TLoadVector constvalue = TFuncLoad<TLoadVector>(&data); -    TCastVector casted = func(value); -    const TCastVector constcasted = func(constvalue); -    char verify[16]; -    char constverify[16]; -    TFuncStore<TCastVector>(&verify, casted); -    TFuncStore<TCastVector>(&constverify, constcasted); - -    UNIT_ASSERT(memcmp(&data, &verify, 16) == 0); -    UNIT_ASSERT(memcmp(&data, &constverify, 16) == 0); -}; - -void TSSEEmulTest::Test_mm_castsi128_ps() { -    Test_mm_castXX<__m128i, __m128, -                   decltype(_mm_castsi128_ps), _mm_castsi128_ps>(); -} - -void TSSEEmulTest::Test_mm_castps_si128() { -    Test_mm_castXX<__m128, __m128i, -                   decltype(_mm_castps_si128), _mm_castps_si128>(); -} - -void TSSEEmulTest::Test_mm_mul_epu32() { -    char data0[16] = { -        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA', -        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'}; -    char data1[16] = { -        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44', -        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'}; -    ui32* dataw0 = reinterpret_cast<ui32*>(&data0); -    ui32* dataw1 = reinterpret_cast<ui32*>(&data1); - -    __m128i value0 = _mm_loadu_si128((__m128i*)&data0); -    __m128i value1 = _mm_loadu_si128((__m128i*)&data1); - +};  +  +void TSSEEmulTest::Test_mm_cvttps_epi32() {  +    struct THelper {  +        static i32 Call(const float op) {  +            return i32(op);  +        }  +    };  +    Test_mm_convertop<float, i32, 4, __m128, __m128i,  +                      THelper, Wrap(_mm_cvttps_epi32), int32x4_t>();  +};  +  +template <typename TLoadVector, typename TCastVector,  +          typename TFunc, TFunc* func>  +void TSSEEmulTest::Test_mm_castXX() {  +    char data[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +  +    TLoadVector value = TFuncLoad<TLoadVector>(&data);  +    const TLoadVector constvalue = TFuncLoad<TLoadVector>(&data);  +    TCastVector casted = func(value);  +    const TCastVector constcasted = func(constvalue);  +    char verify[16];  +    char constverify[16];  +    TFuncStore<TCastVector>(&verify, casted);  +    TFuncStore<TCastVector>(&constverify, constcasted);  +  +    UNIT_ASSERT(memcmp(&data, &verify, 16) == 0);  +    UNIT_ASSERT(memcmp(&data, &constverify, 16) == 0);  +};  +  +void TSSEEmulTest::Test_mm_castsi128_ps() {  +    Test_mm_castXX<__m128i, __m128,  +                   decltype(_mm_castsi128_ps), _mm_castsi128_ps>();  +}  +  +void TSSEEmulTest::Test_mm_castps_si128() {  +    Test_mm_castXX<__m128, __m128i,  +                   decltype(_mm_castps_si128), _mm_castps_si128>();  +}  +  +void TSSEEmulTest::Test_mm_mul_epu32() {  +    char data0[16] = {  +        '\xAA', '\x00', '\xFF', '\xCC', '\x11', '\x22', '\xBB', '\xAA',  +        '\x33', '\x99', '\x44', '\x88', '\x55', '\x77', '\x66', '\x1C'};  +    char data1[16] = {  +        '\x99', '\x33', '\x1C', '\x55', '\x88', '\x66', '\x77', '\x44',  +        '\x00', '\xAA', '\xAA', '\x11', '\xCC', '\xBB', '\x22', '\xFF'};  +    ui32* dataw0 = reinterpret_cast<ui32*>(&data0);  +    ui32* dataw1 = reinterpret_cast<ui32*>(&data1);  +  +    __m128i value0 = _mm_loadu_si128((__m128i*)&data0);  +    __m128i value1 = _mm_loadu_si128((__m128i*)&data1);  +       ui64 mul0 = (ui64) dataw0[0] * (ui64) dataw1[0];      ui64 mul1 = (ui64) dataw0[2] * (ui64) dataw1[2]; - -    __m128i result = _mm_mul_epu32(value0, value1); - -    UNIT_ASSERT_EQUAL(mul0, TQType<uint64x2_t>::As(result)[0]); -    UNIT_ASSERT_EQUAL(mul1, TQType<uint64x2_t>::As(result)[1]); -} +  +    __m128i result = _mm_mul_epu32(value0, value1);  +  +    UNIT_ASSERT_EQUAL(mul0, TQType<uint64x2_t>::As(result)[0]);  +    UNIT_ASSERT_EQUAL(mul1, TQType<uint64x2_t>::As(result)[1]);  +}   void TSSEEmulTest::Test_mm_cmpunord_ps() {      alignas(16) float valuesBits[4] = {1.f, 2.f, 3.f, 4.f}; diff --git a/library/cpp/sse/ut/ya.make b/library/cpp/sse/ut/ya.make index 45e104971ee..14cac6727a4 100644 --- a/library/cpp/sse/ut/ya.make +++ b/library/cpp/sse/ut/ya.make @@ -1,13 +1,13 @@  UNITTEST_FOR(library/cpp/sse) - +   OWNER(danlark) - -SRCS( +  +SRCS(       test.cpp -) - +)  +   IF (ARCH_X86_64)      CFLAGS(-msse4.1 -msse4.2)  ENDIF() -END() +END()  diff --git a/library/cpp/testing/unittest/registar.h b/library/cpp/testing/unittest/registar.h index 44517a00924..28256b53f2f 100644 --- a/library/cpp/testing/unittest/registar.h +++ b/library/cpp/testing/unittest/registar.h @@ -279,8 +279,8 @@ private:                                                   \      }                                                      \                                                             \      virtual void Execute() override {                      \ -        this->AtStart(); - +        this->AtStart();  +   #ifndef UT_SKIP_EXCEPTIONS  #define CATCH_REACTION(FN, e, context) this->AddError(("(" + TypeName(e) + ") " + e.what()).data(), context)  #define CATCH_REACTION_BT(FN, e, context) this->AddError(("(" + TypeName(e) + ") " + e.what()).data(), (e.BackTrace() ? e.BackTrace()->PrintToString() : TString()), context) diff --git a/library/cpp/testing/unittest/utmain.cpp b/library/cpp/testing/unittest/utmain.cpp index 305bc6b40fc..cec11773ed1 100644 --- a/library/cpp/testing/unittest/utmain.cpp +++ b/library/cpp/testing/unittest/utmain.cpp @@ -207,7 +207,7 @@ public:          } else {              TString suite = TString(name).substr(0, colon);              EnabledSuites_.insert(suite); -            EnabledSuites_.insert(name); +            EnabledSuites_.insert(name);               EnabledTests_.insert(name);              EnabledTests_.insert(TString() + name + "::*");          } diff --git a/library/cpp/threading/light_rw_lock/bench/lightrwlock_test.cpp b/library/cpp/threading/light_rw_lock/bench/lightrwlock_test.cpp index c3027ea5449..5e217c25ade 100644 --- a/library/cpp/threading/light_rw_lock/bench/lightrwlock_test.cpp +++ b/library/cpp/threading/light_rw_lock/bench/lightrwlock_test.cpp @@ -1,188 +1,188 @@  #include <library/cpp/threading/light_rw_lock/lightrwlock.h> -#include <util/random/random.h> - -#ifdef _linux_ -// Light rw lock is implemented only for linux - -using namespace NS_LightRWLock; - -#include <pthread.h> -#include <stdlib.h> -#include <stdio.h> - -#define LIGHT - -#ifdef RWSPINLOCK +#include <util/random/random.h>  +  +#ifdef _linux_  +// Light rw lock is implemented only for linux  +  +using namespace NS_LightRWLock;  +  +#include <pthread.h>  +#include <stdlib.h>  +#include <stdio.h>  +  +#define LIGHT  +  +#ifdef RWSPINLOCK   #include <library/cpp/lwtrace/rwspinlock.h> -#endif - -#define CHECK_LOGIC 1 -#define LOOPCOUNT 1000000 -#define RANRCOUNT 100 -#define THREADCOUNT 40 -#define WRITELOCKS 100 - -#if defined(_MSC_VER) -static int Y_FORCE_INLINE AtomicFetchAdd(volatile int& item, int value) { -    return _InterlockedExchangeAdd((&item, value); -} -#elif defined(__GNUC__) -#else +#endif  +  +#define CHECK_LOGIC 1  +#define LOOPCOUNT 1000000  +#define RANRCOUNT 100  +#define THREADCOUNT 40  +#define WRITELOCKS 100  +  +#if defined(_MSC_VER)  +static int Y_FORCE_INLINE AtomicFetchAdd(volatile int& item, int value) {  +    return _InterlockedExchangeAdd((&item, value);  +}  +#elif defined(__GNUC__)  +#else   #error unsupported platform -#endif - -class TPosixRWLock { -public: -    TPosixRWLock() { -    } - -    ~TPosixRWLock() { -        pthread_rwlock_destroy(&rwlock); -    } - -    TPosixRWLock(const TPosixRWLock&) = delete; -    void operator=(const TPosixRWLock&) = delete; - -private: -    pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER; -    friend class TPosixRWShareLocker; -    friend class TPosixRWExclusiveLocker; -}; - -#if defined(LIGHT) -TLightRWLock __attribute__((aligned(64))) rwlock; -#elif defined(POSIX) -TPosixRWLock rwlock; -#elif defined(RWSPINLOCK) -TRWSpinLock __attribute__((aligned(64))) rwlock; -#else -#error "define lock type" -#endif - -volatile __attribute__((aligned(64))) int checkIt = 0; -volatile int checkExcl = 0; - -class TPosixRWShareLocker { -public: -    TPosixRWShareLocker(TPosixRWLock& lock) -        : LockP_(&lock) -    { -        pthread_rwlock_rdlock(&LockP_->rwlock); -    } - -    ~TPosixRWShareLocker() { -        pthread_rwlock_unlock(&LockP_->rwlock); -    } - -    TPosixRWShareLocker(const TPosixRWShareLocker&) = delete; -    void operator=(const TPosixRWShareLocker&) = delete; - -private: -    TPosixRWLock* LockP_; -}; - -class TPosixRWExclusiveLocker { -public: -    TPosixRWExclusiveLocker(TPosixRWLock& lock) -        : LockP_(&lock) -    { -        pthread_rwlock_wrlock(&LockP_->rwlock); -    } - -    ~TPosixRWExclusiveLocker() { -        pthread_rwlock_unlock(&LockP_->rwlock); -    } -    TPosixRWExclusiveLocker(const TPosixRWExclusiveLocker&) = delete; -    void operator=(const TPosixRWExclusiveLocker&) = delete; - -private: -    TPosixRWLock* LockP_; -}; - -template <typename TLocker, bool excl> -static Y_FORCE_INLINE void Run() { -    TLocker lockIt(rwlock); - -#if defined(CHECK_LOGIC) && CHECK_LOGIC -    if (!excl && checkExcl == 1) { -        printf("there is a bug\n"); -    } - -    int result = AtomicFetchAdd(checkIt, 1); -    if (excl) -        checkExcl = 1; - -    if (excl && result > 1) -        printf("there is a bug\n"); -#endif - -    for (unsigned w = 0; w < RANRCOUNT; ++w) -        RandomNumber<ui32>(); - -#if defined(CHECK_LOGIC) && CHECK_LOGIC -    if (excl) -        checkExcl = 0; - -    AtomicFetchAdd(checkIt, -1); -#endif -} - -#ifdef LIGHT -static void* fast_thread_start(__attribute__((unused)) void* arg) { -    for (unsigned q = 0; q < LOOPCOUNT; ++q) { -        char excl = (RandomNumber<ui32>() % WRITELOCKS) == 0; -        if (excl) -            Run<TLightWriteGuard, 1>(); -        else -            Run<TLightReadGuard, 0>(); -    } -    return NULL; -} -#endif - -#ifdef POSIX -static void* fast_thread_start(__attribute__((unused)) void* arg) { -    for (unsigned q = 0; q < LOOPCOUNT; ++q) { -        char excl = (RandomNumber<ui32>() % WRITELOCKS) == 0; -        if (excl) -            Run<TPosixRWExclusiveLocker, 1>(); -        else -            Run<TPosixRWShareLocker, 0>(); -    } -    return NULL; -} -#endif - -#ifdef RWSPINLOCK -static void* fast_thread_start(__attribute__((unused)) void* arg) { -    for (unsigned q = 0; q < LOOPCOUNT; ++q) { -        char excl = (RandomNumber<ui32>() % WRITELOCKS) == 0; -        if (excl) -            Run<TWriteSpinLockGuard, 1>(); -        else -            Run<TReadSpinLockGuard, 0>(); -    } -    return NULL; -} -#endif - -int main() { -    pthread_t threads[THREADCOUNT]; - -    for (unsigned q = 0; q < THREADCOUNT; ++q) { -        pthread_create(&(threads[q]), NULL, &fast_thread_start, NULL); -    } - -    for (unsigned q = 0; q < THREADCOUNT; ++q) -        pthread_join(threads[q], NULL); - -    return 0; -} - -#else // !_linux_ - -int main() { -    return 0; -} - -#endif +#endif  +  +class TPosixRWLock {  +public:  +    TPosixRWLock() {  +    }  +  +    ~TPosixRWLock() {  +        pthread_rwlock_destroy(&rwlock);  +    }  +  +    TPosixRWLock(const TPosixRWLock&) = delete;  +    void operator=(const TPosixRWLock&) = delete;  +  +private:  +    pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER;  +    friend class TPosixRWShareLocker;  +    friend class TPosixRWExclusiveLocker;  +};  +  +#if defined(LIGHT)  +TLightRWLock __attribute__((aligned(64))) rwlock;  +#elif defined(POSIX)  +TPosixRWLock rwlock;  +#elif defined(RWSPINLOCK)  +TRWSpinLock __attribute__((aligned(64))) rwlock;  +#else  +#error "define lock type"  +#endif  +  +volatile __attribute__((aligned(64))) int checkIt = 0;  +volatile int checkExcl = 0;  +  +class TPosixRWShareLocker {  +public:  +    TPosixRWShareLocker(TPosixRWLock& lock)  +        : LockP_(&lock)  +    {  +        pthread_rwlock_rdlock(&LockP_->rwlock);  +    }  +  +    ~TPosixRWShareLocker() {  +        pthread_rwlock_unlock(&LockP_->rwlock);  +    }  +  +    TPosixRWShareLocker(const TPosixRWShareLocker&) = delete;  +    void operator=(const TPosixRWShareLocker&) = delete;  +  +private:  +    TPosixRWLock* LockP_;  +};  +  +class TPosixRWExclusiveLocker {  +public:  +    TPosixRWExclusiveLocker(TPosixRWLock& lock)  +        : LockP_(&lock)  +    {  +        pthread_rwlock_wrlock(&LockP_->rwlock);  +    }  +  +    ~TPosixRWExclusiveLocker() {  +        pthread_rwlock_unlock(&LockP_->rwlock);  +    }  +    TPosixRWExclusiveLocker(const TPosixRWExclusiveLocker&) = delete;  +    void operator=(const TPosixRWExclusiveLocker&) = delete;  +  +private:  +    TPosixRWLock* LockP_;  +};  +  +template <typename TLocker, bool excl>  +static Y_FORCE_INLINE void Run() {  +    TLocker lockIt(rwlock);  +  +#if defined(CHECK_LOGIC) && CHECK_LOGIC  +    if (!excl && checkExcl == 1) {  +        printf("there is a bug\n");  +    }  +  +    int result = AtomicFetchAdd(checkIt, 1);  +    if (excl)  +        checkExcl = 1;  +  +    if (excl && result > 1)  +        printf("there is a bug\n");  +#endif  +  +    for (unsigned w = 0; w < RANRCOUNT; ++w)  +        RandomNumber<ui32>();  +  +#if defined(CHECK_LOGIC) && CHECK_LOGIC  +    if (excl)  +        checkExcl = 0;  +  +    AtomicFetchAdd(checkIt, -1);  +#endif  +}  +  +#ifdef LIGHT  +static void* fast_thread_start(__attribute__((unused)) void* arg) {  +    for (unsigned q = 0; q < LOOPCOUNT; ++q) {  +        char excl = (RandomNumber<ui32>() % WRITELOCKS) == 0;  +        if (excl)  +            Run<TLightWriteGuard, 1>();  +        else  +            Run<TLightReadGuard, 0>();  +    }  +    return NULL;  +}  +#endif  +  +#ifdef POSIX  +static void* fast_thread_start(__attribute__((unused)) void* arg) {  +    for (unsigned q = 0; q < LOOPCOUNT; ++q) {  +        char excl = (RandomNumber<ui32>() % WRITELOCKS) == 0;  +        if (excl)  +            Run<TPosixRWExclusiveLocker, 1>();  +        else  +            Run<TPosixRWShareLocker, 0>();  +    }  +    return NULL;  +}  +#endif  +  +#ifdef RWSPINLOCK  +static void* fast_thread_start(__attribute__((unused)) void* arg) {  +    for (unsigned q = 0; q < LOOPCOUNT; ++q) {  +        char excl = (RandomNumber<ui32>() % WRITELOCKS) == 0;  +        if (excl)  +            Run<TWriteSpinLockGuard, 1>();  +        else  +            Run<TReadSpinLockGuard, 0>();  +    }  +    return NULL;  +}  +#endif  +  +int main() {  +    pthread_t threads[THREADCOUNT];  +  +    for (unsigned q = 0; q < THREADCOUNT; ++q) {  +        pthread_create(&(threads[q]), NULL, &fast_thread_start, NULL);  +    }  +  +    for (unsigned q = 0; q < THREADCOUNT; ++q)  +        pthread_join(threads[q], NULL);  +  +    return 0;  +}  +  +#else // !_linux_  +  +int main() {  +    return 0;  +}  +  +#endif  diff --git a/library/cpp/threading/light_rw_lock/bench/ya.make b/library/cpp/threading/light_rw_lock/bench/ya.make index 7969b52a501..ed89e3a9b0a 100644 --- a/library/cpp/threading/light_rw_lock/bench/ya.make +++ b/library/cpp/threading/light_rw_lock/bench/ya.make @@ -1,13 +1,13 @@ -PROGRAM(lightrwlock_test) - -OWNER(agri) - -SRCS( -    lightrwlock_test.cpp -) - -PEERDIR( +PROGRAM(lightrwlock_test)  +  +OWNER(agri)  +  +SRCS(  +    lightrwlock_test.cpp  +)  +  +PEERDIR(       library/cpp/threading/light_rw_lock -) - -END() +)  +  +END()  diff --git a/library/cpp/threading/light_rw_lock/lightrwlock.cpp b/library/cpp/threading/light_rw_lock/lightrwlock.cpp index fbb63fd47f7..58456907d2b 100644 --- a/library/cpp/threading/light_rw_lock/lightrwlock.cpp +++ b/library/cpp/threading/light_rw_lock/lightrwlock.cpp @@ -1,113 +1,113 @@ -#include "lightrwlock.h" -#include <util/system/spinlock.h> - -#if defined(_linux_) - -using namespace NS_LightRWLock; - -void TLightRWLock::WaitForUntrappedShared() { -    for (;;) { -        for (ui32 i = 0; i < SpinCount_; ++i) { -            SpinLockPause(); - -            if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) -                return; -        } - -        SequenceStore(UnshareFutex_, 1); -        if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) { -            AtomicStore(UnshareFutex_, 0); -            return; -        } -        FutexWait(UnshareFutex_, 1); -    } -} - -void TLightRWLock::WaitForExclusiveAndUntrappedShared() { -    for (;;) { -        for (ui32 i = 0; i < SpinCount_; ++i) { -            SpinLockPause(); - -            if (AtomicLoad(Counter_) >= 0) -                goto try_to_get_lock; -            if (AtomicLoad(TrappedFutex_) == 1) -                goto skip_store_trapped; -        } - -        SequenceStore(TrappedFutex_, 1); -    skip_store_trapped: - -        if (AtomicLoad(Counter_) < 0) { -            FutexWait(TrappedFutex_, 1); -        } - -    try_to_get_lock: -        if (!AtomicSetBit(Counter_, 31)) -            break; -    } - -    for (ui32 j = 0;; ++j) { -        for (ui32 i = 0; i < SpinCount_; ++i) { -            if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) -                return; - -            SpinLockPause(); -        } - -        SequenceStore(UnshareFutex_, 1); - -        if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) { -            AtomicStore(UnshareFutex_, 0); -            return; -        } - -        FutexWait(UnshareFutex_, 1); -    } -} - -void TLightRWLock::WaitForUntrappedAndAcquireRead() { -    if (AtomicFetchAdd(Counter_, -1) < 0) -        goto skip_lock_try; - -    for (;;) { -    again: -        if (Y_UNLIKELY(AtomicFetchAdd(Counter_, 1) >= 0)) { -            return; -        } else { -            if (AtomicFetchAdd(Counter_, -1) >= 0) -                goto again; -        } - -    skip_lock_try: +#include "lightrwlock.h"  +#include <util/system/spinlock.h>  +  +#if defined(_linux_)  +  +using namespace NS_LightRWLock;  +  +void TLightRWLock::WaitForUntrappedShared() {  +    for (;;) {  +        for (ui32 i = 0; i < SpinCount_; ++i) {  +            SpinLockPause();  +  +            if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0)  +                return;  +        }  +  +        SequenceStore(UnshareFutex_, 1);  +        if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) {  +            AtomicStore(UnshareFutex_, 0);  +            return;  +        }  +        FutexWait(UnshareFutex_, 1);  +    }  +}  +  +void TLightRWLock::WaitForExclusiveAndUntrappedShared() {  +    for (;;) {  +        for (ui32 i = 0; i < SpinCount_; ++i) {  +            SpinLockPause();  +  +            if (AtomicLoad(Counter_) >= 0)  +                goto try_to_get_lock;  +            if (AtomicLoad(TrappedFutex_) == 1)  +                goto skip_store_trapped;  +        }  +  +        SequenceStore(TrappedFutex_, 1);  +    skip_store_trapped:  +  +        if (AtomicLoad(Counter_) < 0) {  +            FutexWait(TrappedFutex_, 1);  +        }  +  +    try_to_get_lock:  +        if (!AtomicSetBit(Counter_, 31))  +            break;  +    }  +  +    for (ui32 j = 0;; ++j) {  +        for (ui32 i = 0; i < SpinCount_; ++i) {  +            if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0)  +                return;  +  +            SpinLockPause();  +        }  +  +        SequenceStore(UnshareFutex_, 1);  +  +        if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) {  +            AtomicStore(UnshareFutex_, 0);  +            return;  +        }  +  +        FutexWait(UnshareFutex_, 1);  +    }  +}  +  +void TLightRWLock::WaitForUntrappedAndAcquireRead() {  +    if (AtomicFetchAdd(Counter_, -1) < 0)  +        goto skip_lock_try;  +  +    for (;;) {  +    again:  +        if (Y_UNLIKELY(AtomicFetchAdd(Counter_, 1) >= 0)) {  +            return;  +        } else {  +            if (AtomicFetchAdd(Counter_, -1) >= 0)  +                goto again;  +        }  +  +    skip_lock_try:           if (AtomicLoad(UnshareFutex_) && (AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) { -            SequenceStore(UnshareFutex_, 0); -            FutexWake(UnshareFutex_, 1); -        } - -        for (;;) { -            for (ui32 i = 0; i < SpinCount_; ++i) { -                SpinLockPause(); - -                if (AtomicLoad(Counter_) >= 0) -                    goto again; -                if (AtomicLoad(TrappedFutex_) == 1) -                    goto skip_store_trapped; -            } - -            SequenceStore(TrappedFutex_, 1); -        skip_store_trapped: - -            if (AtomicLoad(Counter_) < 0) { -                FutexWait(TrappedFutex_, 1); -                if (AtomicLoad(Counter_) < 0) -                    goto again; -            } else if (AtomicLoad(TrappedFutex_)) { -                SequenceStore(TrappedFutex_, 0); -                FutexWake(TrappedFutex_, 0x7fffffff); -            } -            break; -        } -    } -} - -#endif // _linux_ +            SequenceStore(UnshareFutex_, 0);  +            FutexWake(UnshareFutex_, 1);  +        }  +  +        for (;;) {  +            for (ui32 i = 0; i < SpinCount_; ++i) {  +                SpinLockPause();  +  +                if (AtomicLoad(Counter_) >= 0)  +                    goto again;  +                if (AtomicLoad(TrappedFutex_) == 1)  +                    goto skip_store_trapped;  +            }  +  +            SequenceStore(TrappedFutex_, 1);  +        skip_store_trapped:  +  +            if (AtomicLoad(Counter_) < 0) {  +                FutexWait(TrappedFutex_, 1);  +                if (AtomicLoad(Counter_) < 0)  +                    goto again;  +            } else if (AtomicLoad(TrappedFutex_)) {  +                SequenceStore(TrappedFutex_, 0);  +                FutexWake(TrappedFutex_, 0x7fffffff);  +            }  +            break;  +        }  +    }  +}  +  +#endif // _linux_  diff --git a/library/cpp/threading/light_rw_lock/lightrwlock.h b/library/cpp/threading/light_rw_lock/lightrwlock.h index 931a1817bce..44117871695 100644 --- a/library/cpp/threading/light_rw_lock/lightrwlock.h +++ b/library/cpp/threading/light_rw_lock/lightrwlock.h @@ -1,45 +1,45 @@ -#pragma once +#pragma once  -#include <util/system/rwlock.h> +#include <util/system/rwlock.h>   #include <util/system/sanitizers.h> - -#if defined(_linux_) -/* TLightRWLock is optimized for read lock and very fast lock/unlock switching. -   Read lock increments counter. -   Write lock sets highest bit of counter (makes counter negative). - -   Whenever a thread tries to acquire read lock that thread increments -   the counter. If the thread gets negative value of the counter right just -   after the increment that means write lock was acquired in another thread. -   In that case the thread decrements the counter back, wakes one thread on -   UnshareFutex, waits on the TrappedFutex and then tries acquire read lock -   from the beginning. -   If the thread gets positive value of the counter after the increment -   then read lock was successfully acquired and -   the thread can proceed execution. - -   Whenever a thread tries to acquire write lock that thread set the highest bit -   of the counter. If the thread determine that the bit was set previously then -   write lock was acquired in another thread. In that case the thread waits on -   the TrappedFutex and then tries again from the beginning. -   If the highest bit was successfully set then thread check if any read lock -   exists at the moment. If so the thread waits on UnshareFutex. If there is -   no more read locks then write lock was successfully acquired and the thread -   can proceed execution. -*/ - -#include <linux/futex.h> +  +#if defined(_linux_)  +/* TLightRWLock is optimized for read lock and very fast lock/unlock switching.  +   Read lock increments counter.  +   Write lock sets highest bit of counter (makes counter negative).  +  +   Whenever a thread tries to acquire read lock that thread increments  +   the counter. If the thread gets negative value of the counter right just  +   after the increment that means write lock was acquired in another thread.  +   In that case the thread decrements the counter back, wakes one thread on  +   UnshareFutex, waits on the TrappedFutex and then tries acquire read lock  +   from the beginning.  +   If the thread gets positive value of the counter after the increment  +   then read lock was successfully acquired and  +   the thread can proceed execution.  +  +   Whenever a thread tries to acquire write lock that thread set the highest bit  +   of the counter. If the thread determine that the bit was set previously then  +   write lock was acquired in another thread. In that case the thread waits on  +   the TrappedFutex and then tries again from the beginning.  +   If the highest bit was successfully set then thread check if any read lock  +   exists at the moment. If so the thread waits on UnshareFutex. If there is  +   no more read locks then write lock was successfully acquired and the thread  +   can proceed execution.  +*/  +  +#include <linux/futex.h>   #include <unistd.h> -#include <sys/syscall.h> -#include <errno.h> - -namespace NS_LightRWLock { +#include <sys/syscall.h>  +#include <errno.h>  +  +namespace NS_LightRWLock {       static int Y_FORCE_INLINE AtomicFetchAdd(volatile int& item, int value) {          return __atomic_fetch_add(&item, value, __ATOMIC_SEQ_CST);      } - -#if defined(_x86_64_) || defined(_i386_) - +  +#if defined(_x86_64_) || defined(_i386_)  +       static char Y_FORCE_INLINE AtomicSetBit(volatile int& item, unsigned bit) {          char ret;          __asm__ __volatile__( @@ -54,7 +54,7 @@ namespace NS_LightRWLock {          return ret;      } - +       static char Y_FORCE_INLINE AtomicClearBit(volatile int& item, unsigned bit) {          char ret;          __asm__ __volatile__( @@ -69,22 +69,22 @@ namespace NS_LightRWLock {          return ret;      } - - -#else - +  +  +#else  +       static char Y_FORCE_INLINE AtomicSetBit(volatile int& item, unsigned bit) {          int prev = __atomic_fetch_or(&item, 1 << bit, __ATOMIC_SEQ_CST);          return (prev & (1 << bit)) != 0 ? 1 : 0;      } - +       static char Y_FORCE_INLINE      AtomicClearBit(volatile int& item, unsigned bit) {          int prev = __atomic_fetch_and(&item, ~(1 << bit), __ATOMIC_SEQ_CST);          return (prev & (1 << bit)) != 0 ? 1 : 0;      }  #endif - +   #if defined(_x86_64_) || defined(_i386_) || defined (__aarch64__) || defined (__powerpc64__)      static bool AtomicLockHighByte(volatile int& item) {          union TA { @@ -98,23 +98,23 @@ namespace NS_LightRWLock {                                             __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);      } -#endif - +#endif  +       template <typename TInt>      static void Y_FORCE_INLINE AtomicStore(volatile TInt& var, TInt value) {          __atomic_store_n(&var, value, __ATOMIC_RELEASE);      } - +       template <typename TInt>      static void Y_FORCE_INLINE SequenceStore(volatile TInt& var, TInt value) {          __atomic_store_n(&var, value, __ATOMIC_SEQ_CST);      } - +       template <typename TInt>      static TInt Y_FORCE_INLINE AtomicLoad(const volatile TInt& var) {          return __atomic_load_n(&var, __ATOMIC_ACQUIRE);      } - +       static void Y_FORCE_INLINE FutexWait(volatile int& fvar, int value) {          for (;;) {              int result = @@ -126,9 +126,9 @@ namespace NS_LightRWLock {                      continue;                  Y_FAIL("futex error");              } -        } -    } - +        }  +    }  +       static void Y_FORCE_INLINE FutexWake(volatile int& fvar, int amount) {          const int result =              syscall(SYS_futex, &fvar, FUTEX_WAKE_PRIVATE, amount, NULL, NULL, 0); @@ -136,85 +136,85 @@ namespace NS_LightRWLock {              Y_FAIL("futex error");      } -} - -class alignas(64) TLightRWLock { -public: -    TLightRWLock(ui32 spinCount = 10) -        : Counter_(0) -        , TrappedFutex_(0) -        , UnshareFutex_(0) -        , SpinCount_(spinCount) +}  +  +class alignas(64) TLightRWLock {  +public:  +    TLightRWLock(ui32 spinCount = 10)  +        : Counter_(0)  +        , TrappedFutex_(0)  +        , UnshareFutex_(0)  +        , SpinCount_(spinCount)       {      } - -    TLightRWLock(const TLightRWLock&) = delete; -    void operator=(const TLightRWLock&) = delete; - -    Y_FORCE_INLINE void AcquireWrite() { -        using namespace NS_LightRWLock; - -        if (AtomicLockHighByte(Counter_)) { -            if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0) -                return; -            return WaitForUntrappedShared(); -        } -        WaitForExclusiveAndUntrappedShared(); -    } - -    Y_FORCE_INLINE void AcquireRead() { -        using namespace NS_LightRWLock; - -        if (Y_LIKELY(AtomicFetchAdd(Counter_, 1) >= 0)) -            return; -        WaitForUntrappedAndAcquireRead(); -    } - -    Y_FORCE_INLINE void ReleaseWrite() { -        using namespace NS_LightRWLock; - -        AtomicClearBit(Counter_, 31); -        if (AtomicLoad(TrappedFutex_)) { -            SequenceStore(TrappedFutex_, 0); -            FutexWake(TrappedFutex_, 0x7fffffff); -        } -    } - -    Y_FORCE_INLINE void ReleaseRead() { -        using namespace NS_LightRWLock; - -        if (Y_LIKELY(AtomicFetchAdd(Counter_, -1) >= 0)) -            return; -        if (!AtomicLoad(UnshareFutex_)) -            return; -        if ((AtomicLoad(Counter_) & 0x7fffffff) == 0) { -            SequenceStore(UnshareFutex_, 0); -            FutexWake(UnshareFutex_, 1); -        } -    } - -private: -    volatile int Counter_; -    volatile int TrappedFutex_; -    volatile int UnshareFutex_; -    const ui32 SpinCount_; - -    void WaitForUntrappedShared(); -    void WaitForExclusiveAndUntrappedShared(); -    void WaitForUntrappedAndAcquireRead(); -}; - -#else - -class TLightRWLock: public TRWMutex { -public: +  +    TLightRWLock(const TLightRWLock&) = delete;  +    void operator=(const TLightRWLock&) = delete;  +  +    Y_FORCE_INLINE void AcquireWrite() {  +        using namespace NS_LightRWLock;  +  +        if (AtomicLockHighByte(Counter_)) {  +            if ((AtomicLoad(Counter_) & 0x7FFFFFFF) == 0)  +                return;  +            return WaitForUntrappedShared();  +        }  +        WaitForExclusiveAndUntrappedShared();  +    }  +  +    Y_FORCE_INLINE void AcquireRead() {  +        using namespace NS_LightRWLock;  +  +        if (Y_LIKELY(AtomicFetchAdd(Counter_, 1) >= 0))  +            return;  +        WaitForUntrappedAndAcquireRead();  +    }  +  +    Y_FORCE_INLINE void ReleaseWrite() {  +        using namespace NS_LightRWLock;  +  +        AtomicClearBit(Counter_, 31);  +        if (AtomicLoad(TrappedFutex_)) {  +            SequenceStore(TrappedFutex_, 0);  +            FutexWake(TrappedFutex_, 0x7fffffff);  +        }  +    }  +  +    Y_FORCE_INLINE void ReleaseRead() {  +        using namespace NS_LightRWLock;  +  +        if (Y_LIKELY(AtomicFetchAdd(Counter_, -1) >= 0))  +            return;  +        if (!AtomicLoad(UnshareFutex_))  +            return;  +        if ((AtomicLoad(Counter_) & 0x7fffffff) == 0) {  +            SequenceStore(UnshareFutex_, 0);  +            FutexWake(UnshareFutex_, 1);  +        }  +    }  +  +private:  +    volatile int Counter_;  +    volatile int TrappedFutex_;  +    volatile int UnshareFutex_;  +    const ui32 SpinCount_;  +  +    void WaitForUntrappedShared();  +    void WaitForExclusiveAndUntrappedShared();  +    void WaitForUntrappedAndAcquireRead();  +};  +  +#else  +  +class TLightRWLock: public TRWMutex {  +public:       TLightRWLock() {      }      TLightRWLock(ui32) {      } -}; - -#endif - -using TLightReadGuard = TReadGuardBase<TLightRWLock>; -using TLightWriteGuard = TWriteGuardBase<TLightRWLock>; +};  +  +#endif  +  +using TLightReadGuard = TReadGuardBase<TLightRWLock>;  +using TLightWriteGuard = TWriteGuardBase<TLightRWLock>;  diff --git a/library/cpp/threading/light_rw_lock/ut/rwlock_ut.cpp b/library/cpp/threading/light_rw_lock/ut/rwlock_ut.cpp index e82063d959f..3ed4bf68faa 100644 --- a/library/cpp/threading/light_rw_lock/ut/rwlock_ut.cpp +++ b/library/cpp/threading/light_rw_lock/ut/rwlock_ut.cpp @@ -1,122 +1,122 @@  #include <library/cpp/threading/light_rw_lock/lightrwlock.h>  #include <library/cpp/testing/unittest/registar.h> -#include <util/random/random.h> -#include <util/system/atomic.h> +#include <util/random/random.h>  +#include <util/system/atomic.h>   #include <util/thread/pool.h> - -class TRWMutexTest: public TTestBase { -    UNIT_TEST_SUITE(TRWMutexTest); -    UNIT_TEST(TestReaders) -    UNIT_TEST(TestReadersWriters) -    UNIT_TEST_SUITE_END(); - -    struct TSharedData { -        TSharedData() -            : writersIn(0) -            , readersIn(0) -            , failed(false) -        { -        } - -        TAtomic writersIn; -        TAtomic readersIn; - -        bool failed; - -        TLightRWLock mutex; -    }; - -    class TThreadTask: public IObjectInQueue { -    public: -        using PFunc = void (TThreadTask::*)(void); - -        TThreadTask(PFunc func, TSharedData& data, size_t id, size_t total) -            : Func_(func) -            , Data_(data) -            , Id_(id) -            , Total_(total) -        { -        } - -        void Process(void*) override { -            THolder<TThreadTask> This(this); - -            (this->*Func_)(); -        } - -#define FAIL_ASSERT(cond)    \ -    if (!(cond)) {           \ -        Data_.failed = true; \ -    } -        void RunReaders() { -            Data_.mutex.AcquireRead(); - -            AtomicIncrement(Data_.readersIn); -            usleep(100); -            FAIL_ASSERT(Data_.readersIn == long(Total_)); -            usleep(100); -            AtomicDecrement(Data_.readersIn); - -            Data_.mutex.ReleaseRead(); -        } - -        void RunReadersWriters() { -            if (Id_ % 2 == 0) { -                for (size_t i = 0; i < 10; ++i) { -                    Data_.mutex.AcquireRead(); - -                    AtomicIncrement(Data_.readersIn); -                    FAIL_ASSERT(Data_.writersIn == 0); -                    usleep(RandomNumber<ui32>() % 5); -                    AtomicDecrement(Data_.readersIn); - -                    Data_.mutex.ReleaseRead(); -                } -            } else { -                for (size_t i = 0; i < 10; ++i) { -                    Data_.mutex.AcquireWrite(); - -                    AtomicIncrement(Data_.writersIn); -                    FAIL_ASSERT(Data_.readersIn == 0 && Data_.writersIn == 1); -                    usleep(RandomNumber<ui32>() % 5); -                    AtomicDecrement(Data_.writersIn); - -                    Data_.mutex.ReleaseWrite(); -                } -            } -        } -#undef FAIL_ASSERT - -    private: -        PFunc Func_; -        TSharedData& Data_; -        size_t Id_; -        size_t Total_; -    }; - -private: -#define RUN_CYCLE(what, count)                                                     \ -    Q_.Start(count);                                                               \ -    for (size_t i = 0; i < count; ++i) {                                           \ -        UNIT_ASSERT(Q_.Add(new TThreadTask(&TThreadTask::what, Data_, i, count))); \ -    }                                                                              \ -    Q_.Stop();                                                                     \ -    bool b = Data_.failed;                                                         \ -    Data_.failed = false;                                                          \ -    UNIT_ASSERT(!b); - -    void TestReaders() { -        RUN_CYCLE(RunReaders, 1); -    } - -    void TestReadersWriters() { -        RUN_CYCLE(RunReadersWriters, 1); -    } - -#undef RUN_CYCLE -private: -    TSharedData Data_; +  +class TRWMutexTest: public TTestBase {  +    UNIT_TEST_SUITE(TRWMutexTest);  +    UNIT_TEST(TestReaders)  +    UNIT_TEST(TestReadersWriters)  +    UNIT_TEST_SUITE_END();  +  +    struct TSharedData {  +        TSharedData()  +            : writersIn(0)  +            , readersIn(0)  +            , failed(false)  +        {  +        }  +  +        TAtomic writersIn;  +        TAtomic readersIn;  +  +        bool failed;  +  +        TLightRWLock mutex;  +    };  +  +    class TThreadTask: public IObjectInQueue {  +    public:  +        using PFunc = void (TThreadTask::*)(void);  +  +        TThreadTask(PFunc func, TSharedData& data, size_t id, size_t total)  +            : Func_(func)  +            , Data_(data)  +            , Id_(id)  +            , Total_(total)  +        {  +        }  +  +        void Process(void*) override {  +            THolder<TThreadTask> This(this);  +  +            (this->*Func_)();  +        }  +  +#define FAIL_ASSERT(cond)    \  +    if (!(cond)) {           \  +        Data_.failed = true; \  +    }  +        void RunReaders() {  +            Data_.mutex.AcquireRead();  +  +            AtomicIncrement(Data_.readersIn);  +            usleep(100);  +            FAIL_ASSERT(Data_.readersIn == long(Total_));  +            usleep(100);  +            AtomicDecrement(Data_.readersIn);  +  +            Data_.mutex.ReleaseRead();  +        }  +  +        void RunReadersWriters() {  +            if (Id_ % 2 == 0) {  +                for (size_t i = 0; i < 10; ++i) {  +                    Data_.mutex.AcquireRead();  +  +                    AtomicIncrement(Data_.readersIn);  +                    FAIL_ASSERT(Data_.writersIn == 0);  +                    usleep(RandomNumber<ui32>() % 5);  +                    AtomicDecrement(Data_.readersIn);  +  +                    Data_.mutex.ReleaseRead();  +                }  +            } else {  +                for (size_t i = 0; i < 10; ++i) {  +                    Data_.mutex.AcquireWrite();  +  +                    AtomicIncrement(Data_.writersIn);  +                    FAIL_ASSERT(Data_.readersIn == 0 && Data_.writersIn == 1);  +                    usleep(RandomNumber<ui32>() % 5);  +                    AtomicDecrement(Data_.writersIn);  +  +                    Data_.mutex.ReleaseWrite();  +                }  +            }  +        }  +#undef FAIL_ASSERT  +  +    private:  +        PFunc Func_;  +        TSharedData& Data_;  +        size_t Id_;  +        size_t Total_;  +    };  +  +private:  +#define RUN_CYCLE(what, count)                                                     \  +    Q_.Start(count);                                                               \  +    for (size_t i = 0; i < count; ++i) {                                           \  +        UNIT_ASSERT(Q_.Add(new TThreadTask(&TThreadTask::what, Data_, i, count))); \  +    }                                                                              \  +    Q_.Stop();                                                                     \  +    bool b = Data_.failed;                                                         \  +    Data_.failed = false;                                                          \  +    UNIT_ASSERT(!b);  +  +    void TestReaders() {  +        RUN_CYCLE(RunReaders, 1);  +    }  +  +    void TestReadersWriters() {  +        RUN_CYCLE(RunReadersWriters, 1);  +    }  +  +#undef RUN_CYCLE  +private:  +    TSharedData Data_;       TThreadPool Q_; -}; - -UNIT_TEST_SUITE_REGISTRATION(TRWMutexTest) +};  +  +UNIT_TEST_SUITE_REGISTRATION(TRWMutexTest)  diff --git a/library/cpp/threading/light_rw_lock/ut/ya.make b/library/cpp/threading/light_rw_lock/ut/ya.make index 92928b837c2..9b1a54d7ecd 100644 --- a/library/cpp/threading/light_rw_lock/ut/ya.make +++ b/library/cpp/threading/light_rw_lock/ut/ya.make @@ -1,9 +1,9 @@  UNITTEST_FOR(library/cpp/threading/light_rw_lock) - +   OWNER(agri) - -SRCS( -    rwlock_ut.cpp -) - -END() +  +SRCS(  +    rwlock_ut.cpp  +)  +  +END()  diff --git a/library/cpp/threading/light_rw_lock/ya.make b/library/cpp/threading/light_rw_lock/ya.make index a196fb85886..e86fd422238 100644 --- a/library/cpp/threading/light_rw_lock/ya.make +++ b/library/cpp/threading/light_rw_lock/ya.make @@ -1,10 +1,10 @@ -LIBRARY() - -OWNER(agri) - -SRCS( -    lightrwlock.cpp -    lightrwlock.h -) - -END() +LIBRARY()  +  +OWNER(agri)  +  +SRCS(  +    lightrwlock.cpp  +    lightrwlock.h  +)  +  +END()  diff --git a/library/cpp/threading/queue/basic_ut.cpp b/library/cpp/threading/queue/basic_ut.cpp index 5f56f8583ec..2db5d6e8e83 100644 --- a/library/cpp/threading/queue/basic_ut.cpp +++ b/library/cpp/threading/queue/basic_ut.cpp @@ -1,92 +1,92 @@  #include <library/cpp/testing/unittest/registar.h> -#include <util/generic/vector.h> -#include <util/system/thread.h> - -#include "ut_helpers.h" - +#include <util/generic/vector.h>  +#include <util/system/thread.h>  +  +#include "ut_helpers.h"  +   template <typename TQueueType> -class TQueueTestsInSingleThread: public TTestBase { -private: +class TQueueTestsInSingleThread: public TTestBase {  +private:       using TSelf = TQueueTestsInSingleThread<TQueueType>; -    using TLink = TIntrusiveLink; - -    UNIT_TEST_SUITE_DEMANGLE(TSelf); -    UNIT_TEST(OnePushOnePop) -    UNIT_TEST(OnePushOnePop_Repeat1M) -    UNIT_TEST(Threads8_Repeat1M_Push1Pop1) -    UNIT_TEST_SUITE_END(); - -public: -    void OnePushOnePop() { +    using TLink = TIntrusiveLink;  +  +    UNIT_TEST_SUITE_DEMANGLE(TSelf);  +    UNIT_TEST(OnePushOnePop)  +    UNIT_TEST(OnePushOnePop_Repeat1M)  +    UNIT_TEST(Threads8_Repeat1M_Push1Pop1)  +    UNIT_TEST_SUITE_END();  +  +public:  +    void OnePushOnePop() {           TQueueType queue; - -        auto popped = queue.Pop(); -        UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); - -        TLink msg; -        queue.Push(&msg); -        popped = queue.Pop(); -        UNIT_ASSERT_VALUES_EQUAL(&msg, popped); - -        popped = queue.Pop(); -        UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); -    }; - -    void OnePushOnePop_Repeat1M() { +  +        auto popped = queue.Pop();  +        UNIT_ASSERT_VALUES_EQUAL(popped, nullptr);  +  +        TLink msg;  +        queue.Push(&msg);  +        popped = queue.Pop();  +        UNIT_ASSERT_VALUES_EQUAL(&msg, popped);  +  +        popped = queue.Pop();  +        UNIT_ASSERT_VALUES_EQUAL(popped, nullptr);  +    };  +  +    void OnePushOnePop_Repeat1M() {           TQueueType queue; -        TLink msg; - -        auto popped = queue.Pop(); -        UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); - -        for (int i = 0; i < 1000000; ++i) { -            queue.Push(&msg); -            popped = queue.Pop(); -            UNIT_ASSERT_VALUES_EQUAL(&msg, popped); - -            popped = queue.Pop(); -            UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); -        } -    } - -    template <size_t NUMBER_OF_THREADS> -    void RepeatPush1Pop1_InManyThreads() { +        TLink msg;  +  +        auto popped = queue.Pop();  +        UNIT_ASSERT_VALUES_EQUAL(popped, nullptr);  +  +        for (int i = 0; i < 1000000; ++i) {  +            queue.Push(&msg);  +            popped = queue.Pop();  +            UNIT_ASSERT_VALUES_EQUAL(&msg, popped);  +  +            popped = queue.Pop();  +            UNIT_ASSERT_VALUES_EQUAL(popped, nullptr);  +        }  +    }  +  +    template <size_t NUMBER_OF_THREADS>  +    void RepeatPush1Pop1_InManyThreads() {           class TCycleThread: public ISimpleThread { -        public: -            void* ThreadProc() override { +        public:  +            void* ThreadProc() override {                   TQueueType queue; -                TLink msg; -                auto popped = queue.Pop(); -                UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); - -                for (size_t i = 0; i < 1000000; ++i) { -                    queue.Push(&msg); -                    popped = queue.Pop(); -                    UNIT_ASSERT_VALUES_EQUAL(popped, &msg); - -                    popped = queue.Pop(); -                    UNIT_ASSERT_VALUES_EQUAL(popped, nullptr); -                } -                return nullptr; -            } -        }; - +                TLink msg;  +                auto popped = queue.Pop();  +                UNIT_ASSERT_VALUES_EQUAL(popped, nullptr);  +  +                for (size_t i = 0; i < 1000000; ++i) {  +                    queue.Push(&msg);  +                    popped = queue.Pop();  +                    UNIT_ASSERT_VALUES_EQUAL(popped, &msg);  +  +                    popped = queue.Pop();  +                    UNIT_ASSERT_VALUES_EQUAL(popped, nullptr);  +                }  +                return nullptr;  +            }  +        };  +           TVector<TAutoPtr<TCycleThread>> cyclers; - -        for (size_t i = 0; i < NUMBER_OF_THREADS; ++i) { -            cyclers.emplace_back(new TCycleThread); -            cyclers.back()->Start(); -        } - -        for (size_t i = 0; i < NUMBER_OF_THREADS; ++i) { -            cyclers[i]->Join(); -        } -    } - -    void Threads8_Repeat1M_Push1Pop1() { -        RepeatPush1Pop1_InManyThreads<8>(); -    } -}; - -REGISTER_TESTS_FOR_ALL_ORDERED_QUEUES(TQueueTestsInSingleThread); -REGISTER_TESTS_FOR_ALL_UNORDERED_QUEUES(TQueueTestsInSingleThread) +  +        for (size_t i = 0; i < NUMBER_OF_THREADS; ++i) {  +            cyclers.emplace_back(new TCycleThread);  +            cyclers.back()->Start();  +        }  +  +        for (size_t i = 0; i < NUMBER_OF_THREADS; ++i) {  +            cyclers[i]->Join();  +        }  +    }  +  +    void Threads8_Repeat1M_Push1Pop1() {  +        RepeatPush1Pop1_InManyThreads<8>();  +    }  +};  +  +REGISTER_TESTS_FOR_ALL_ORDERED_QUEUES(TQueueTestsInSingleThread);  +REGISTER_TESTS_FOR_ALL_UNORDERED_QUEUES(TQueueTestsInSingleThread)  diff --git a/library/cpp/threading/queue/mpmc_unordered_ring.cpp b/library/cpp/threading/queue/mpmc_unordered_ring.cpp index 160547f5946..df48182210f 100644 --- a/library/cpp/threading/queue/mpmc_unordered_ring.cpp +++ b/library/cpp/threading/queue/mpmc_unordered_ring.cpp @@ -1,74 +1,74 @@ -#include "mpmc_unordered_ring.h" - -namespace NThreading { -    TMPMCUnorderedRing::TMPMCUnorderedRing(size_t size) { -        Y_VERIFY(size > 0); -        RingSize = size; -        RingBuffer.Reset(new void*[size]); -        memset(&RingBuffer[0], 0, sizeof(void*) * size); -    } - -    bool TMPMCUnorderedRing::Push(void* msg, ui16 retryCount) noexcept { -        if (retryCount == 0) { -            StubbornPush(msg); -            return true; -        } -        for (ui16 itry = retryCount; itry-- > 0;) { -            if (WeakPush(msg)) { -                return true; -            } -        } -        return false; -    } - -    bool TMPMCUnorderedRing::WeakPush(void* msg) noexcept { -        auto pawl = AtomicIncrement(WritePawl); -        if (pawl - AtomicGet(ReadFront) >= RingSize) { -            // Queue is full -            AtomicDecrement(WritePawl); -            return false; -        } - -        auto writeSlot = AtomicGetAndIncrement(WriteFront); -        if (AtomicCas(&RingBuffer[writeSlot % RingSize], msg, nullptr)) { -            return true; -        } -        // slot is occupied for some reason, retry -        return false; -    } - -    void* TMPMCUnorderedRing::Pop() noexcept { -        ui64 readSlot; - -        for (ui16 itry = MAX_POP_TRIES; itry-- > 0;) { -            auto pawl = AtomicIncrement(ReadPawl); -            if (pawl > AtomicGet(WriteFront)) { -                // Queue is empty -                AtomicDecrement(ReadPawl); -                return nullptr; -            } - -            readSlot = AtomicGetAndIncrement(ReadFront); - -            auto msg = AtomicSwap(&RingBuffer[readSlot % RingSize], nullptr); -            if (msg != nullptr) { -                return msg; -            } -        } - -        /* got no message in the slot, let's try to rollback readfront */ -        AtomicCas(&ReadFront, readSlot - 1, readSlot); -        return nullptr; -    } - -    void* TMPMCUnorderedRing::UnsafeScanningPop(ui64* last) noexcept { -        for (; *last < RingSize;) { -            auto msg = AtomicSwap(&RingBuffer[*last], nullptr); -            ++*last; -            if (msg != nullptr) { -                return msg; -            } -        } -        return nullptr; -    } -} +#include "mpmc_unordered_ring.h"  +  +namespace NThreading {  +    TMPMCUnorderedRing::TMPMCUnorderedRing(size_t size) {  +        Y_VERIFY(size > 0);  +        RingSize = size;  +        RingBuffer.Reset(new void*[size]);  +        memset(&RingBuffer[0], 0, sizeof(void*) * size);  +    }  +  +    bool TMPMCUnorderedRing::Push(void* msg, ui16 retryCount) noexcept {  +        if (retryCount == 0) {  +            StubbornPush(msg);  +            return true;  +        }  +        for (ui16 itry = retryCount; itry-- > 0;) {  +            if (WeakPush(msg)) {  +                return true;  +            }  +        }  +        return false;  +    }  +  +    bool TMPMCUnorderedRing::WeakPush(void* msg) noexcept {  +        auto pawl = AtomicIncrement(WritePawl);  +        if (pawl - AtomicGet(ReadFront) >= RingSize) {  +            // Queue is full  +            AtomicDecrement(WritePawl);  +            return false;  +        }  +  +        auto writeSlot = AtomicGetAndIncrement(WriteFront);  +        if (AtomicCas(&RingBuffer[writeSlot % RingSize], msg, nullptr)) {  +            return true;  +        }  +        // slot is occupied for some reason, retry  +        return false;  +    }  +  +    void* TMPMCUnorderedRing::Pop() noexcept {  +        ui64 readSlot;  +  +        for (ui16 itry = MAX_POP_TRIES; itry-- > 0;) {  +            auto pawl = AtomicIncrement(ReadPawl);  +            if (pawl > AtomicGet(WriteFront)) {  +                // Queue is empty  +                AtomicDecrement(ReadPawl);  +                return nullptr;  +            }  +  +            readSlot = AtomicGetAndIncrement(ReadFront);  +  +            auto msg = AtomicSwap(&RingBuffer[readSlot % RingSize], nullptr);  +            if (msg != nullptr) {  +                return msg;  +            }  +        }  +  +        /* got no message in the slot, let's try to rollback readfront */  +        AtomicCas(&ReadFront, readSlot - 1, readSlot);  +        return nullptr;  +    }  +  +    void* TMPMCUnorderedRing::UnsafeScanningPop(ui64* last) noexcept {  +        for (; *last < RingSize;) {  +            auto msg = AtomicSwap(&RingBuffer[*last], nullptr);  +            ++*last;  +            if (msg != nullptr) {  +                return msg;  +            }  +        }  +        return nullptr;  +    }  +}  diff --git a/library/cpp/threading/queue/mpmc_unordered_ring.h b/library/cpp/threading/queue/mpmc_unordered_ring.h index 5042f7528e8..59758d2c352 100644 --- a/library/cpp/threading/queue/mpmc_unordered_ring.h +++ b/library/cpp/threading/queue/mpmc_unordered_ring.h @@ -1,42 +1,42 @@ -#pragma once - -/* -  It's not a general purpose queue. -  No order guarantee, but it mostly ordered. -  Items may stuck in almost empty queue. -  Use UnsafeScanningPop to pop all stuck items. -  Almost wait-free for producers and consumers. - */ - -#include <util/system/atomic.h> -#include <util/generic/ptr.h> - -namespace NThreading { -    struct TMPMCUnorderedRing { -    public: -        static constexpr ui16 MAX_PUSH_TRIES = 4; -        static constexpr ui16 MAX_POP_TRIES = 4; - -        TMPMCUnorderedRing(size_t size); - -        bool Push(void* msg, ui16 retryCount = MAX_PUSH_TRIES) noexcept; -        void StubbornPush(void* msg) { -            while (!WeakPush(msg)) { -            } -        } - -        void* Pop() noexcept; - -        void* UnsafeScanningPop(ui64* last) noexcept; - -    private: -        bool WeakPush(void* msg) noexcept; - -        size_t RingSize; -        TArrayPtr<void*> RingBuffer; -        ui64 WritePawl = 0; -        ui64 WriteFront = 0; -        ui64 ReadPawl = 0; -        ui64 ReadFront = 0; -    }; -} +#pragma once  +  +/*  +  It's not a general purpose queue.  +  No order guarantee, but it mostly ordered.  +  Items may stuck in almost empty queue.  +  Use UnsafeScanningPop to pop all stuck items.  +  Almost wait-free for producers and consumers.  + */  +  +#include <util/system/atomic.h>  +#include <util/generic/ptr.h>  +  +namespace NThreading {  +    struct TMPMCUnorderedRing {  +    public:  +        static constexpr ui16 MAX_PUSH_TRIES = 4;  +        static constexpr ui16 MAX_POP_TRIES = 4;  +  +        TMPMCUnorderedRing(size_t size);  +  +        bool Push(void* msg, ui16 retryCount = MAX_PUSH_TRIES) noexcept;  +        void StubbornPush(void* msg) {  +            while (!WeakPush(msg)) {  +            }  +        }  +  +        void* Pop() noexcept;  +  +        void* UnsafeScanningPop(ui64* last) noexcept;  +  +    private:  +        bool WeakPush(void* msg) noexcept;  +  +        size_t RingSize;  +        TArrayPtr<void*> RingBuffer;  +        ui64 WritePawl = 0;  +        ui64 WriteFront = 0;  +        ui64 ReadPawl = 0;  +        ui64 ReadFront = 0;  +    };  +}  diff --git a/library/cpp/threading/queue/mpsc_htswap.cpp b/library/cpp/threading/queue/mpsc_htswap.cpp index 610c8f67f13..d8ab0d4f488 100644 --- a/library/cpp/threading/queue/mpsc_htswap.cpp +++ b/library/cpp/threading/queue/mpsc_htswap.cpp @@ -1 +1 @@ -#include "mpsc_htswap.h" +#include "mpsc_htswap.h"  diff --git a/library/cpp/threading/queue/mpsc_htswap.h b/library/cpp/threading/queue/mpsc_htswap.h index c42caa7ac02..2d0bfd1146a 100644 --- a/library/cpp/threading/queue/mpsc_htswap.h +++ b/library/cpp/threading/queue/mpsc_htswap.h @@ -1,132 +1,132 @@ -#pragma once - -/* -  http://www.1024cores.net/home/lock-free-algorithms/queues/non-intrusive-mpsc-node-based-queue - -  Simple semi-wait-free queue. Many producers - one consumer. -  Tracking of allocated memory is not required. -  No CAS. Only atomic swap (exchange) operations. - -  WARNING: a sleeping producer can stop progress for consumer. - -  WARNING: there is no wait¬ify mechanic for consumer, -  consumer receives nullptr if queue was empty. - -  WARNING: the algorithm itself is lock-free -  but producers and consumer could be blocked by memory allocator - -  Reference design: rtmapreduce/libs/threading/lfqueue.h - */ - -#include <util/generic/noncopyable.h> -#include <util/system/types.h> -#include <util/system/atomic.h> - -#include "tune.h" - -namespace NThreading { -    namespace NHTSwapPrivate { -        template <typename T, typename TTuneup> -        struct TNode +#pragma once  +  +/*  +  http://www.1024cores.net/home/lock-free-algorithms/queues/non-intrusive-mpsc-node-based-queue  +  +  Simple semi-wait-free queue. Many producers - one consumer.  +  Tracking of allocated memory is not required.  +  No CAS. Only atomic swap (exchange) operations.  +  +  WARNING: a sleeping producer can stop progress for consumer.  +  +  WARNING: there is no wait¬ify mechanic for consumer,  +  consumer receives nullptr if queue was empty.  +  +  WARNING: the algorithm itself is lock-free  +  but producers and consumer could be blocked by memory allocator  +  +  Reference design: rtmapreduce/libs/threading/lfqueue.h  + */  +  +#include <util/generic/noncopyable.h>  +#include <util/system/types.h>  +#include <util/system/atomic.h>  +  +#include "tune.h"  +  +namespace NThreading {  +    namespace NHTSwapPrivate {  +        template <typename T, typename TTuneup>  +        struct TNode              : public TTuneup::TNodeBase,                public TTuneup::template TNodeLayout<TNode<T, TTuneup>, T> { -            TNode(const T& item) { -                this->Next = nullptr; -                this->Item = item; -            } - -            TNode(T&& item) { -                this->Next = nullptr; -                this->Item = std::move(item); -            } -        }; - -        struct TDefaultTuneup { -            struct TNodeBase: private TNonCopyable { -            }; - -            template <typename TNode, typename T> -            struct TNodeLayout { -                TNode* Next; -                T Item; -            }; - -            template <typename TNode> -            struct TQueueLayout { -                TNode* Head; -                TNode* Tail; -            }; -        }; - -        template <typename T, typename TTuneup> -        class THTSwapQueueImpl +            TNode(const T& item) {  +                this->Next = nullptr;  +                this->Item = item;  +            }  +  +            TNode(T&& item) {  +                this->Next = nullptr;  +                this->Item = std::move(item);  +            }  +        };  +  +        struct TDefaultTuneup {  +            struct TNodeBase: private TNonCopyable {  +            };  +  +            template <typename TNode, typename T>  +            struct TNodeLayout {  +                TNode* Next;  +                T Item;  +            };  +  +            template <typename TNode>  +            struct TQueueLayout {  +                TNode* Head;  +                TNode* Tail;  +            };  +        };  +  +        template <typename T, typename TTuneup>  +        class THTSwapQueueImpl              : protected  TTuneup::template TQueueLayout<TNode<T, TTuneup>> { -        protected: -            using TTunedNode = TNode<T, TTuneup>; - -        public: -            using TItem = T; - -            THTSwapQueueImpl() { -                this->Head = new TTunedNode(T()); -                this->Tail = this->Head; -            } - -            ~THTSwapQueueImpl() { -                TTunedNode* node = this->Head; -                while (node != nullptr) { -                    TTunedNode* next = node->Next; -                    delete node; -                    node = next; -                } -            } - -            template <typename TT> -            void Push(TT&& item) { -                Enqueue(new TTunedNode(std::forward<TT>(item))); -            } - -            T Peek() { -                TTunedNode* next = AtomicGet(this->Head->Next); -                if (next == nullptr) { -                    return T(); -                } -                return next->Item; -            } - -            void Enqueue(TTunedNode* node) { -                // our goal is to avoid expensive CAS here, -                // but now consumer will be blocked until new tail linked. -                // fortunately 'window of inconsistency' is extremely small. -                TTunedNode* prev = AtomicSwap(&this->Tail, node); -                AtomicSet(prev->Next, node); -            } - -            T Pop() { -                TTunedNode* next = AtomicGet(this->Head->Next); -                if (next == nullptr) { -                    return nullptr; -                } -                auto item = std::move(next->Item); -                std::swap(this->Head, next); // no need atomic here -                delete next; -                return item; -            } - -            bool IsEmpty() const { -                TTunedNode* next = AtomicGet(this->Head->Next); -                return (next == nullptr); -            } -        }; -    } - -    DeclareTuneTypeParam(THTSwapNodeBase, TNodeBase); -    DeclareTuneTypeParam(THTSwapNodeLayout, TNodeLayout); -    DeclareTuneTypeParam(THTSwapQueueLayout, TQueueLayout); - +        protected:  +            using TTunedNode = TNode<T, TTuneup>;  +  +        public:  +            using TItem = T;  +  +            THTSwapQueueImpl() {  +                this->Head = new TTunedNode(T());  +                this->Tail = this->Head;  +            }  +  +            ~THTSwapQueueImpl() {  +                TTunedNode* node = this->Head;  +                while (node != nullptr) {  +                    TTunedNode* next = node->Next;  +                    delete node;  +                    node = next;  +                }  +            }  +  +            template <typename TT>  +            void Push(TT&& item) {  +                Enqueue(new TTunedNode(std::forward<TT>(item)));  +            }  +  +            T Peek() {  +                TTunedNode* next = AtomicGet(this->Head->Next);  +                if (next == nullptr) {  +                    return T();  +                }  +                return next->Item;  +            }  +  +            void Enqueue(TTunedNode* node) {  +                // our goal is to avoid expensive CAS here,  +                // but now consumer will be blocked until new tail linked.  +                // fortunately 'window of inconsistency' is extremely small.  +                TTunedNode* prev = AtomicSwap(&this->Tail, node);  +                AtomicSet(prev->Next, node);  +            }  +  +            T Pop() {  +                TTunedNode* next = AtomicGet(this->Head->Next);  +                if (next == nullptr) {  +                    return nullptr;  +                }  +                auto item = std::move(next->Item);  +                std::swap(this->Head, next); // no need atomic here  +                delete next;  +                return item;  +            }  +  +            bool IsEmpty() const {  +                TTunedNode* next = AtomicGet(this->Head->Next);  +                return (next == nullptr);  +            }  +        };  +    }  +  +    DeclareTuneTypeParam(THTSwapNodeBase, TNodeBase);  +    DeclareTuneTypeParam(THTSwapNodeLayout, TNodeLayout);  +    DeclareTuneTypeParam(THTSwapQueueLayout, TQueueLayout);  +       template <typename T = void*, typename... TParams> -    class THTSwapQueue +    class THTSwapQueue          : public NHTSwapPrivate::THTSwapQueueImpl<T,                                                    TTune<NHTSwapPrivate::TDefaultTuneup, TParams...>> { -    }; -} +    };  +}  diff --git a/library/cpp/threading/queue/mpsc_intrusive_unordered.cpp b/library/cpp/threading/queue/mpsc_intrusive_unordered.cpp index 3bb1a04f7e9..a6a2fcef398 100644 --- a/library/cpp/threading/queue/mpsc_intrusive_unordered.cpp +++ b/library/cpp/threading/queue/mpsc_intrusive_unordered.cpp @@ -1,79 +1,79 @@ -#include "mpsc_intrusive_unordered.h" -#include <util/system/atomic.h> - -namespace NThreading { -    void TMPSCIntrusiveUnordered::Push(TIntrusiveNode* node) noexcept { -        auto head = AtomicGet(HeadForCaS); -        for (ui32 i = NUMBER_OF_TRIES_FOR_CAS; i-- > 0;) { -            // no ABA here, because Next is exactly head -            // it does not matter how many travels head was made/ -            node->Next = head; -            auto prev = AtomicGetAndCas(&HeadForCaS, node, head); -            if (head == prev) { -                return; -            } -            head = prev; -        } -        // boring of trying to do cas, let's just swap - -        // no need for atomic here, because the next is atomic swap -        node->Next = 0; - -        head = AtomicSwap(&HeadForSwap, node); -        if (head != nullptr) { -            AtomicSet(node->Next, head); -        } else { -            // consumer must know if no other thread may access the memory, -            // setting Next to node is a way to notify consumer -            AtomicSet(node->Next, node); -        } -    } - -    TIntrusiveNode* TMPSCIntrusiveUnordered::PopMany() noexcept { -        if (NotReadyChain == nullptr) { -            auto head = AtomicSwap(&HeadForSwap, nullptr); -            NotReadyChain = head; -        } - -        if (NotReadyChain != nullptr) { -            auto next = AtomicGet(NotReadyChain->Next); -            if (next != nullptr) { -                auto ready = NotReadyChain; -                TIntrusiveNode* cut; -                do { -                    cut = NotReadyChain; -                    NotReadyChain = next; -                    next = AtomicGet(NotReadyChain->Next); -                    if (next == NotReadyChain) { -                        cut = NotReadyChain; -                        NotReadyChain = nullptr; -                        break; -                    } -                } while (next != nullptr); -                cut->Next = nullptr; -                return ready; -            } -        } - -        if (AtomicGet(HeadForCaS) != nullptr) { -            return AtomicSwap(&HeadForCaS, nullptr); -        } -        return nullptr; -    } - -    TIntrusiveNode* TMPSCIntrusiveUnordered::Pop() noexcept { -        if (PopOneQueue != nullptr) { -            auto head = PopOneQueue; -            PopOneQueue = PopOneQueue->Next; -            return head; -        } - -        PopOneQueue = PopMany(); -        if (PopOneQueue != nullptr) { -            auto head = PopOneQueue; -            PopOneQueue = PopOneQueue->Next; -            return head; -        } -        return nullptr; -    } -} +#include "mpsc_intrusive_unordered.h"  +#include <util/system/atomic.h>  +  +namespace NThreading {  +    void TMPSCIntrusiveUnordered::Push(TIntrusiveNode* node) noexcept {  +        auto head = AtomicGet(HeadForCaS);  +        for (ui32 i = NUMBER_OF_TRIES_FOR_CAS; i-- > 0;) {  +            // no ABA here, because Next is exactly head  +            // it does not matter how many travels head was made/  +            node->Next = head;  +            auto prev = AtomicGetAndCas(&HeadForCaS, node, head);  +            if (head == prev) {  +                return;  +            }  +            head = prev;  +        }  +        // boring of trying to do cas, let's just swap  +  +        // no need for atomic here, because the next is atomic swap  +        node->Next = 0;  +  +        head = AtomicSwap(&HeadForSwap, node);  +        if (head != nullptr) {  +            AtomicSet(node->Next, head);  +        } else {  +            // consumer must know if no other thread may access the memory,  +            // setting Next to node is a way to notify consumer  +            AtomicSet(node->Next, node);  +        }  +    }  +  +    TIntrusiveNode* TMPSCIntrusiveUnordered::PopMany() noexcept {  +        if (NotReadyChain == nullptr) {  +            auto head = AtomicSwap(&HeadForSwap, nullptr);  +            NotReadyChain = head;  +        }  +  +        if (NotReadyChain != nullptr) {  +            auto next = AtomicGet(NotReadyChain->Next);  +            if (next != nullptr) {  +                auto ready = NotReadyChain;  +                TIntrusiveNode* cut;  +                do {  +                    cut = NotReadyChain;  +                    NotReadyChain = next;  +                    next = AtomicGet(NotReadyChain->Next);  +                    if (next == NotReadyChain) {  +                        cut = NotReadyChain;  +                        NotReadyChain = nullptr;  +                        break;  +                    }  +                } while (next != nullptr);  +                cut->Next = nullptr;  +                return ready;  +            }  +        }  +  +        if (AtomicGet(HeadForCaS) != nullptr) {  +            return AtomicSwap(&HeadForCaS, nullptr);  +        }  +        return nullptr;  +    }  +  +    TIntrusiveNode* TMPSCIntrusiveUnordered::Pop() noexcept {  +        if (PopOneQueue != nullptr) {  +            auto head = PopOneQueue;  +            PopOneQueue = PopOneQueue->Next;  +            return head;  +        }  +  +        PopOneQueue = PopMany();  +        if (PopOneQueue != nullptr) {  +            auto head = PopOneQueue;  +            PopOneQueue = PopOneQueue->Next;  +            return head;  +        }  +        return nullptr;  +    }  +}  diff --git a/library/cpp/threading/queue/mpsc_intrusive_unordered.h b/library/cpp/threading/queue/mpsc_intrusive_unordered.h index 6ac7537ae9a..c07cf761f67 100644 --- a/library/cpp/threading/queue/mpsc_intrusive_unordered.h +++ b/library/cpp/threading/queue/mpsc_intrusive_unordered.h @@ -1,35 +1,35 @@ -#pragma once - -/* -  Simple almost-wait-free unordered queue for low contention operations. - -  It's wait-free for producers. -  Hanging producer can hide some items from consumer. - */ - -#include <util/system/types.h> - -namespace NThreading { -    struct TIntrusiveNode { -        TIntrusiveNode* Next; -    }; - -    class TMPSCIntrusiveUnordered { -    public: -        static constexpr ui32 NUMBER_OF_TRIES_FOR_CAS = 3; - -        void Push(TIntrusiveNode* node) noexcept; -        TIntrusiveNode* PopMany() noexcept; -        TIntrusiveNode* Pop() noexcept; - -        void Push(void* node) noexcept { -            Push(reinterpret_cast<TIntrusiveNode*>(node)); -        } - -    private: -        TIntrusiveNode* HeadForCaS = nullptr; -        TIntrusiveNode* HeadForSwap = nullptr; -        TIntrusiveNode* NotReadyChain = nullptr; -        TIntrusiveNode* PopOneQueue = nullptr; -    }; -} +#pragma once  +  +/*  +  Simple almost-wait-free unordered queue for low contention operations.  +  +  It's wait-free for producers.  +  Hanging producer can hide some items from consumer.  + */  +  +#include <util/system/types.h>  +  +namespace NThreading {  +    struct TIntrusiveNode {  +        TIntrusiveNode* Next;  +    };  +  +    class TMPSCIntrusiveUnordered {  +    public:  +        static constexpr ui32 NUMBER_OF_TRIES_FOR_CAS = 3;  +  +        void Push(TIntrusiveNode* node) noexcept;  +        TIntrusiveNode* PopMany() noexcept;  +        TIntrusiveNode* Pop() noexcept;  +  +        void Push(void* node) noexcept {  +            Push(reinterpret_cast<TIntrusiveNode*>(node));  +        }  + +    private:  +        TIntrusiveNode* HeadForCaS = nullptr;  +        TIntrusiveNode* HeadForSwap = nullptr;  +        TIntrusiveNode* NotReadyChain = nullptr;  +        TIntrusiveNode* PopOneQueue = nullptr;  +    };  +}  diff --git a/library/cpp/threading/queue/mpsc_read_as_filled.cpp b/library/cpp/threading/queue/mpsc_read_as_filled.cpp index 8b4664a6f32..3b89fb1df62 100644 --- a/library/cpp/threading/queue/mpsc_read_as_filled.cpp +++ b/library/cpp/threading/queue/mpsc_read_as_filled.cpp @@ -1 +1 @@ -#include "mpsc_read_as_filled.h" +#include "mpsc_read_as_filled.h"  diff --git a/library/cpp/threading/queue/mpsc_read_as_filled.h b/library/cpp/threading/queue/mpsc_read_as_filled.h index be33ba5a584..4dfdb1fbbfa 100644 --- a/library/cpp/threading/queue/mpsc_read_as_filled.h +++ b/library/cpp/threading/queue/mpsc_read_as_filled.h @@ -1,611 +1,611 @@ -#pragma once - -/* -  Completely wait-free queue, multiple producers - one consumer. Strict order. -  The queue algorithm is using concept of virtual infinite array. - +#pragma once  +  +/*  +  Completely wait-free queue, multiple producers - one consumer. Strict order.  +  The queue algorithm is using concept of virtual infinite array.  +     A producer takes a number from a counter and atomically increments the counter. -  The number taken is a number of a slot for the producer to put a new message -  into infinite array. - -  Then producer constructs a virtual infinite array by bidirectional linked list -  of blocks. Each block contains several slots. - +  The number taken is a number of a slot for the producer to put a new message  +  into infinite array.  +  +  Then producer constructs a virtual infinite array by bidirectional linked list  +  of blocks. Each block contains several slots.  +     There is a hint pointer which optimistically points to the last block -  of the list and never goes backward. - -  Consumer exploits the property of the hint pointer always going forward -  to free old blocks eventually. Consumer periodically read the hint pointer -  and the counter and thus deduce producers which potentially holds the pointer -  to a block. Consumer can free the block if all that producers filled their -  slots and left the queue. - -  No producer can stop the progress for other producers. - -  Consumer can't stop the progress for producers. -  Consumer can skip not-yet-filled slots and read them later. -  Thus no producer can stop the progress for consumer. +  of the list and never goes backward.  +  +  Consumer exploits the property of the hint pointer always going forward  +  to free old blocks eventually. Consumer periodically read the hint pointer  +  and the counter and thus deduce producers which potentially holds the pointer  +  to a block. Consumer can free the block if all that producers filled their  +  slots and left the queue.  +  +  No producer can stop the progress for other producers.  +  +  Consumer can't stop the progress for producers.  +  Consumer can skip not-yet-filled slots and read them later.  +  Thus no producer can stop the progress for consumer.     The algorithm is virtually strictly ordered because it skips slots only -  if it is really does not matter in which order the slots were produced and -  consumed. - -  WARNING: there is no wait¬ify mechanic for consumer, -  consumer receives nullptr if queue was empty. - -  WARNING: though the algorithm itself is completely wait-free -  but producers and consumer could be blocked by memory allocator - +  if it is really does not matter in which order the slots were produced and  +  consumed.  +  +  WARNING: there is no wait¬ify mechanic for consumer,  +  consumer receives nullptr if queue was empty.  +  +  WARNING: though the algorithm itself is completely wait-free  +  but producers and consumer could be blocked by memory allocator  +     WARNING: copy constructors of the queue are not thread-safe - */ - -#include <util/generic/deque.h> -#include <util/generic/ptr.h> -#include <util/system/atomic.h> -#include <util/system/spinlock.h> - -#include "tune.h" - -namespace NThreading { -    namespace NReadAsFilledPrivate { -        typedef void* TMsgLink; - -        static constexpr ui32 DEFAULT_BUNCH_SIZE = 251; - -        struct TEmpty { -        }; - -        struct TEmptyAux { -            TEmptyAux Retrieve() const { -                return TEmptyAux(); -            } - -            void Store(TEmptyAux&) { -            } - -            static constexpr TEmptyAux Zero() { -                return TEmptyAux(); -            } -        }; - -        template <typename TAux> -        struct TSlot { -            TMsgLink volatile Msg; -            TAux AuxiliaryData; - -            inline void Store(TAux& aux) { -                AuxiliaryData.Store(aux); -            } - -            inline TAux Retrieve() const { -                return AuxiliaryData.Retrieve(); -            } - -            static TSlot<TAux> NullElem() { -                return {nullptr, TAux::Zero()}; -            } - -            static TSlot<TAux> Pair(TMsgLink msg, TAux aux) { -                return {msg, std::move(aux)}; -            } -        }; - -        template <> -        struct TSlot<TEmptyAux> { -            TMsgLink volatile Msg; - -            inline void Store(TEmptyAux&) { -            } - -            inline TEmptyAux Retrieve() const { -                return TEmptyAux(); -            } - -            static TSlot<TEmptyAux> NullElem() { -                return {nullptr}; -            } - -            static TSlot<TEmptyAux> Pair(TMsgLink msg, TEmptyAux) { -                return {msg}; -            } -        }; - -        enum TPushResult { -            PUSH_RESULT_OK, -            PUSH_RESULT_BACKWARD, -            PUSH_RESULT_FORWARD, -        }; - -        template <ui32 BUNCH_SIZE = DEFAULT_BUNCH_SIZE, -                  typename TBase = TEmpty, -                  typename TAux = TEmptyAux> -        struct TMsgBunch: public TBase { -            static constexpr size_t RELEASE_SIZE = BUNCH_SIZE * 2; - -            ui64 FirstSlot; - -            TSlot<TAux> LinkArray[BUNCH_SIZE]; - -            TMsgBunch* volatile NextBunch; -            TMsgBunch* volatile BackLink; - -            ui64 volatile Token; -            TMsgBunch* volatile NextToken; - -            /* this push can return PUSH_RESULT_BLOCKED */ + */  +  +#include <util/generic/deque.h>  +#include <util/generic/ptr.h>  +#include <util/system/atomic.h>  +#include <util/system/spinlock.h>  +  +#include "tune.h"  +  +namespace NThreading {  +    namespace NReadAsFilledPrivate {  +        typedef void* TMsgLink;  +  +        static constexpr ui32 DEFAULT_BUNCH_SIZE = 251;  +  +        struct TEmpty {  +        };  +  +        struct TEmptyAux {  +            TEmptyAux Retrieve() const {  +                return TEmptyAux();  +            }  +  +            void Store(TEmptyAux&) {  +            }  +  +            static constexpr TEmptyAux Zero() {  +                return TEmptyAux();  +            }  +        };  +  +        template <typename TAux>  +        struct TSlot {  +            TMsgLink volatile Msg;  +            TAux AuxiliaryData;  +  +            inline void Store(TAux& aux) {  +                AuxiliaryData.Store(aux);  +            }  +  +            inline TAux Retrieve() const {  +                return AuxiliaryData.Retrieve();  +            }  +  +            static TSlot<TAux> NullElem() {  +                return {nullptr, TAux::Zero()};  +            }  +  +            static TSlot<TAux> Pair(TMsgLink msg, TAux aux) {  +                return {msg, std::move(aux)};  +            }  +        };  +  +        template <>  +        struct TSlot<TEmptyAux> {  +            TMsgLink volatile Msg;  +  +            inline void Store(TEmptyAux&) {  +            }  +  +            inline TEmptyAux Retrieve() const {  +                return TEmptyAux();  +            }  +  +            static TSlot<TEmptyAux> NullElem() {  +                return {nullptr};  +            }  +  +            static TSlot<TEmptyAux> Pair(TMsgLink msg, TEmptyAux) {  +                return {msg};  +            }  +        };  +  +        enum TPushResult {  +            PUSH_RESULT_OK,  +            PUSH_RESULT_BACKWARD,  +            PUSH_RESULT_FORWARD,  +        };  +  +        template <ui32 BUNCH_SIZE = DEFAULT_BUNCH_SIZE,  +                  typename TBase = TEmpty,  +                  typename TAux = TEmptyAux>  +        struct TMsgBunch: public TBase {  +            static constexpr size_t RELEASE_SIZE = BUNCH_SIZE * 2;  +  +            ui64 FirstSlot;  +  +            TSlot<TAux> LinkArray[BUNCH_SIZE];  +  +            TMsgBunch* volatile NextBunch;  +            TMsgBunch* volatile BackLink;  +  +            ui64 volatile Token;  +            TMsgBunch* volatile NextToken;  +  +            /* this push can return PUSH_RESULT_BLOCKED */               inline TPushResult Push(TMsgLink msg, ui64 slot, TAux auxiliary) { -                if (Y_UNLIKELY(slot < FirstSlot)) { -                    return PUSH_RESULT_BACKWARD; -                } - -                if (Y_UNLIKELY(slot >= FirstSlot + BUNCH_SIZE)) { -                    return PUSH_RESULT_FORWARD; -                } - -                LinkArray[slot - FirstSlot].Store(auxiliary); - -                AtomicSet(LinkArray[slot - FirstSlot].Msg, msg); -                return PUSH_RESULT_OK; -            } - -            inline bool IsSlotHere(ui64 slot) { -                return slot < FirstSlot + BUNCH_SIZE; -            } - -            inline TMsgLink GetSlot(ui64 slot) const { -                return AtomicGet(LinkArray[slot - FirstSlot].Msg); -            } - -            inline TSlot<TAux> GetSlotAux(ui64 slot) const { -                auto msg = GetSlot(slot); -                auto aux = LinkArray[slot - FirstSlot].Retrieve(); -                return TSlot<TAux>::Pair(msg, aux); -            } - -            inline TMsgBunch* GetNextBunch() const { -                return AtomicGet(NextBunch); -            } - -            inline bool SetNextBunch(TMsgBunch* ptr) { -                return AtomicCas(&NextBunch, ptr, nullptr); -            } - -            inline TMsgBunch* GetBackLink() const { -                return AtomicGet(BackLink); -            } - -            inline TMsgBunch* GetToken(ui64 slot) { -                return reinterpret_cast<TMsgBunch*>( -                    LinkArray[slot - FirstSlot].Msg); -            } - -            inline void IncrementToken() { -                AtomicIncrement(Token); -            } - -            // the object could be destroyed after this method -            inline void DecrementToken() { -                if (Y_UNLIKELY(AtomicDecrement(Token) == RELEASE_SIZE)) { -                    Release(this); -                    AtomicGet(NextToken)->DecrementToken(); -                    // this could be invalid here -                } -            } - -            // the object could be destroyed after this method -            inline void SetNextToken(TMsgBunch* next) { -                AtomicSet(NextToken, next); +                if (Y_UNLIKELY(slot < FirstSlot)) {  +                    return PUSH_RESULT_BACKWARD;  +                }  +  +                if (Y_UNLIKELY(slot >= FirstSlot + BUNCH_SIZE)) {  +                    return PUSH_RESULT_FORWARD;  +                }  +  +                LinkArray[slot - FirstSlot].Store(auxiliary);  +  +                AtomicSet(LinkArray[slot - FirstSlot].Msg, msg);  +                return PUSH_RESULT_OK;  +            }  +  +            inline bool IsSlotHere(ui64 slot) {  +                return slot < FirstSlot + BUNCH_SIZE;  +            }  +  +            inline TMsgLink GetSlot(ui64 slot) const {  +                return AtomicGet(LinkArray[slot - FirstSlot].Msg);  +            }  +  +            inline TSlot<TAux> GetSlotAux(ui64 slot) const {  +                auto msg = GetSlot(slot);  +                auto aux = LinkArray[slot - FirstSlot].Retrieve();  +                return TSlot<TAux>::Pair(msg, aux);  +            }  +  +            inline TMsgBunch* GetNextBunch() const {  +                return AtomicGet(NextBunch);  +            }  +  +            inline bool SetNextBunch(TMsgBunch* ptr) {  +                return AtomicCas(&NextBunch, ptr, nullptr);  +            }  +  +            inline TMsgBunch* GetBackLink() const {  +                return AtomicGet(BackLink);  +            }  +  +            inline TMsgBunch* GetToken(ui64 slot) {  +                return reinterpret_cast<TMsgBunch*>(  +                    LinkArray[slot - FirstSlot].Msg);  +            }  +  +            inline void IncrementToken() {  +                AtomicIncrement(Token);  +            }  +  +            // the object could be destroyed after this method  +            inline void DecrementToken() {  +                if (Y_UNLIKELY(AtomicDecrement(Token) == RELEASE_SIZE)) {  +                    Release(this);  +                    AtomicGet(NextToken)->DecrementToken();  +                    // this could be invalid here  +                }  +            }  +  +            // the object could be destroyed after this method  +            inline void SetNextToken(TMsgBunch* next) {  +                AtomicSet(NextToken, next);                   if (Y_UNLIKELY(AtomicAdd(Token, RELEASE_SIZE) == RELEASE_SIZE)) { -                    Release(this); -                    next->DecrementToken(); -                } -                // this could be invalid here -            } - -            TMsgBunch(ui64 start, TMsgBunch* backLink) { -                AtomicSet(FirstSlot, start); -                memset(&LinkArray, 0, sizeof(LinkArray)); -                AtomicSet(NextBunch, nullptr); -                AtomicSet(BackLink, backLink); - -                AtomicSet(Token, 1); -                AtomicSet(NextToken, nullptr); -            } - -            static void Release(TMsgBunch* block) { -                auto backLink = AtomicGet(block->BackLink); -                if (backLink == nullptr) { -                    return; -                } -                AtomicSet(block->BackLink, nullptr); - -                do { -                    auto bbackLink = backLink->BackLink; -                    delete backLink; -                    backLink = bbackLink; -                } while (backLink != nullptr); -            } - -            void Destroy() { -                for (auto tail = BackLink; tail != nullptr;) { -                    auto next = tail->BackLink; -                    delete tail; -                    tail = next; -                } - -                for (auto next = this; next != nullptr;) { -                    auto nnext = next->NextBunch; -                    delete next; -                    next = nnext; -                } -            } -        }; - -        template <ui32 BUNCH_SIZE = DEFAULT_BUNCH_SIZE, -                  typename TBunchBase = NReadAsFilledPrivate::TEmpty, -                  typename TAux = TEmptyAux> -        class TWriteBucket { -        public: -            using TUsingAux = TAux; // for TReadBucket binding -            using TBunch = TMsgBunch<BUNCH_SIZE, TBunchBase, TAux>; - -            TWriteBucket(TBunch* bunch = new TBunch(0, nullptr)) { -                AtomicSet(LastBunch, bunch); -                AtomicSet(SlotCounter, 0); -            } - -            TWriteBucket(TWriteBucket&& move) -                : LastBunch(move.LastBunch) -                , SlotCounter(move.SlotCounter) -            { -                move.LastBunch = nullptr; -            } - -            ~TWriteBucket() { -                if (LastBunch != nullptr) { -                    LastBunch->Destroy(); -                } -            } - -            inline void Push(TMsgLink msg, TAux aux) { -                ui64 pushSlot = AtomicGetAndIncrement(SlotCounter); -                TBunch* hintBunch = GetLastBunch(); - -                for (;;) { -                    auto hint = hintBunch->Push(msg, pushSlot, aux); -                    if (Y_LIKELY(hint == PUSH_RESULT_OK)) { -                        return; -                    } -                    HandleHint(hintBunch, hint); -                } -            } - -        protected: -            template <typename, template <typename, typename...> class> -            friend class TReadBucket; - -            TBunch* volatile LastBunch; // Hint -            volatile ui64 SlotCounter; - -            inline TBunch* GetLastBunch() const { -                return AtomicGet(LastBunch); -            } - -            void HandleHint(TBunch*& hintBunch, TPushResult hint) { -                if (Y_UNLIKELY(hint == PUSH_RESULT_BACKWARD)) { -                    hintBunch = hintBunch->GetBackLink(); -                    return; -                } - -                // PUSH_RESULT_FORWARD -                auto nextBunch = hintBunch->GetNextBunch(); - -                if (nextBunch == nullptr) { -                    auto first = hintBunch->FirstSlot + BUNCH_SIZE; -                    nextBunch = new TBunch(first, hintBunch); -                    if (Y_UNLIKELY(!hintBunch->SetNextBunch(nextBunch))) { -                        delete nextBunch; -                        nextBunch = hintBunch->GetNextBunch(); -                    } -                } - -                // hintBunch could not be freed here so it cannot be reused -                // it's alright if this CAS was not succeeded, -                // it means that other thread did that recently -                AtomicCas(&LastBunch, nextBunch, hintBunch); - -                hintBunch = nextBunch; -            } -        }; - +                    Release(this);  +                    next->DecrementToken();  +                }  +                // this could be invalid here  +            }  +  +            TMsgBunch(ui64 start, TMsgBunch* backLink) {  +                AtomicSet(FirstSlot, start);  +                memset(&LinkArray, 0, sizeof(LinkArray));  +                AtomicSet(NextBunch, nullptr);  +                AtomicSet(BackLink, backLink);  +  +                AtomicSet(Token, 1);  +                AtomicSet(NextToken, nullptr);  +            }  +  +            static void Release(TMsgBunch* block) {  +                auto backLink = AtomicGet(block->BackLink);  +                if (backLink == nullptr) {  +                    return;  +                }  +                AtomicSet(block->BackLink, nullptr);  +  +                do {  +                    auto bbackLink = backLink->BackLink;  +                    delete backLink;  +                    backLink = bbackLink;  +                } while (backLink != nullptr);  +            }  +  +            void Destroy() {  +                for (auto tail = BackLink; tail != nullptr;) {  +                    auto next = tail->BackLink;  +                    delete tail;  +                    tail = next;  +                }  +  +                for (auto next = this; next != nullptr;) {  +                    auto nnext = next->NextBunch;  +                    delete next;  +                    next = nnext;  +                }  +            }  +        };  +  +        template <ui32 BUNCH_SIZE = DEFAULT_BUNCH_SIZE,  +                  typename TBunchBase = NReadAsFilledPrivate::TEmpty,  +                  typename TAux = TEmptyAux>  +        class TWriteBucket {  +        public:  +            using TUsingAux = TAux; // for TReadBucket binding  +            using TBunch = TMsgBunch<BUNCH_SIZE, TBunchBase, TAux>;  +  +            TWriteBucket(TBunch* bunch = new TBunch(0, nullptr)) {  +                AtomicSet(LastBunch, bunch);  +                AtomicSet(SlotCounter, 0);  +            }  +  +            TWriteBucket(TWriteBucket&& move)  +                : LastBunch(move.LastBunch)  +                , SlotCounter(move.SlotCounter)  +            {  +                move.LastBunch = nullptr;  +            }  +  +            ~TWriteBucket() {  +                if (LastBunch != nullptr) {  +                    LastBunch->Destroy();  +                }  +            }  +  +            inline void Push(TMsgLink msg, TAux aux) {  +                ui64 pushSlot = AtomicGetAndIncrement(SlotCounter);  +                TBunch* hintBunch = GetLastBunch();  +  +                for (;;) {  +                    auto hint = hintBunch->Push(msg, pushSlot, aux);  +                    if (Y_LIKELY(hint == PUSH_RESULT_OK)) {  +                        return;  +                    }  +                    HandleHint(hintBunch, hint);  +                }  +            }  +  +        protected:  +            template <typename, template <typename, typename...> class>  +            friend class TReadBucket;  +  +            TBunch* volatile LastBunch; // Hint  +            volatile ui64 SlotCounter;  +  +            inline TBunch* GetLastBunch() const {  +                return AtomicGet(LastBunch);  +            }  +  +            void HandleHint(TBunch*& hintBunch, TPushResult hint) {  +                if (Y_UNLIKELY(hint == PUSH_RESULT_BACKWARD)) {  +                    hintBunch = hintBunch->GetBackLink();  +                    return;  +                }  +  +                // PUSH_RESULT_FORWARD  +                auto nextBunch = hintBunch->GetNextBunch();  +  +                if (nextBunch == nullptr) {  +                    auto first = hintBunch->FirstSlot + BUNCH_SIZE;  +                    nextBunch = new TBunch(first, hintBunch);  +                    if (Y_UNLIKELY(!hintBunch->SetNextBunch(nextBunch))) {  +                        delete nextBunch;  +                        nextBunch = hintBunch->GetNextBunch();  +                    }  +                }  +  +                // hintBunch could not be freed here so it cannot be reused  +                // it's alright if this CAS was not succeeded,  +                // it means that other thread did that recently  +                AtomicCas(&LastBunch, nextBunch, hintBunch);  +  +                hintBunch = nextBunch;  +            }  +        };  +           template <typename TWBucket = TWriteBucket<>,                    template <typename, typename...> class TContainer = TDeque> -        class TReadBucket { -        public: -            using TAux = typename TWBucket::TUsingAux; -            using TBunch = typename TWBucket::TBunch; - -            static constexpr int MAX_NUMBER_OF_TRIES_TO_READ = 5; - -            TReadBucket(TWBucket* writer) -                : Writer(writer) -                , ReadBunch(writer->GetLastBunch()) -                , LastKnownPushBunch(writer->GetLastBunch()) -            { -                ReadBunch->DecrementToken(); // no previous token -            } - -            TReadBucket(TReadBucket toCopy, TWBucket* writer) -                : TReadBucket(std::move(toCopy)) -            { -                Writer = writer; -            } - -            ui64 ReadyCount() const { -                return AtomicGet(Writer->SlotCounter) - ReadSlot; -            } - -            TMsgLink Pop() { -                return PopAux().Msg; -            } - -            TMsgLink Peek() { -                return PeekAux().Msg; -            } - -            TSlot<TAux> PopAux() { -                for (;;) { -                    if (Y_UNLIKELY(ReadNow.size() != 0)) { -                        auto result = PopSkipped(); -                        if (Y_LIKELY(result.Msg != nullptr)) { -                            return result; -                        } -                    } - -                    if (Y_UNLIKELY(ReadSlot == LastKnownPushSlot)) { -                        if (Y_LIKELY(!RereadPushSlot())) { -                            return TSlot<TAux>::NullElem(); -                        } -                        continue; -                    } - -                    if (Y_UNLIKELY(!ReadBunch->IsSlotHere(ReadSlot))) { -                        if (Y_UNLIKELY(!SwitchToNextBunch())) { -                            return TSlot<TAux>::NullElem(); -                        } -                    } - -                    auto result = ReadBunch->GetSlotAux(ReadSlot); -                    if (Y_LIKELY(result.Msg != nullptr)) { -                        ++ReadSlot; -                        return result; -                    } - -                    result = StubbornPop(); -                    if (Y_LIKELY(result.Msg != nullptr)) { -                        return result; -                    } -                } -            } - -            TSlot<TAux> PeekAux() { -                for (;;) { -                    if (Y_UNLIKELY(ReadNow.size() != 0)) { -                        auto result = PeekSkipped(); -                        if (Y_LIKELY(result.Msg != nullptr)) { -                            return result; -                        } -                    } - -                    if (Y_UNLIKELY(ReadSlot == LastKnownPushSlot)) { -                        if (Y_LIKELY(!RereadPushSlot())) { -                            return TSlot<TAux>::NullElem(); -                        } -                        continue; -                    } - -                    if (Y_UNLIKELY(!ReadBunch->IsSlotHere(ReadSlot))) { -                        if (Y_UNLIKELY(!SwitchToNextBunch())) { -                            return TSlot<TAux>::NullElem(); -                        } -                    } - -                    auto result = ReadBunch->GetSlotAux(ReadSlot); -                    if (Y_LIKELY(result.Msg != nullptr)) { -                        return result; -                    } - -                    result = StubbornPeek(); -                    if (Y_LIKELY(result.Msg != nullptr)) { -                        return result; -                    } -                } -            } - -        private: -            TWBucket* Writer; -            TBunch* ReadBunch; -            ui64 ReadSlot = 0; -            TBunch* LastKnownPushBunch; -            ui64 LastKnownPushSlot = 0; - -            struct TSkipItem { -                TBunch* Bunch; -                ui64 Slot; -                TBunch* Token; -            }; - -            TContainer<TSkipItem> ReadNow; -            TContainer<TSkipItem> ReadLater; - -            void AddToReadLater() { -                ReadLater.push_back({ReadBunch, ReadSlot, LastKnownPushBunch}); -                LastKnownPushBunch->IncrementToken(); -                ++ReadSlot; -            } - -            // MUST BE: ReadSlot == LastKnownPushSlot -            bool RereadPushSlot() { -                ReadNow = std::move(ReadLater); -                ReadLater.clear(); - -                auto oldSlot = LastKnownPushSlot; - -                auto currentPushBunch = Writer->GetLastBunch(); -                auto currentPushSlot = AtomicGet(Writer->SlotCounter); - -                if (currentPushBunch != LastKnownPushBunch) { -                    // LastKnownPushBunch could be invalid after this line -                    LastKnownPushBunch->SetNextToken(currentPushBunch); -                } - -                LastKnownPushBunch = currentPushBunch; -                LastKnownPushSlot = currentPushSlot; - -                return oldSlot != LastKnownPushSlot; -            } - -            bool SwitchToNextBunch() { -                for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) { -                    auto next = ReadBunch->GetNextBunch(); -                    if (next != nullptr) { -                        ReadBunch = next; -                        return true; -                    } -                    SpinLockPause(); -                } -                return false; -            } - -            TSlot<TAux> StubbornPop() { -                for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) { -                    auto result = ReadBunch->GetSlotAux(ReadSlot); -                    if (Y_LIKELY(result.Msg != nullptr)) { -                        ++ReadSlot; -                        return result; -                    } -                    SpinLockPause(); -                } - -                AddToReadLater(); -                return TSlot<TAux>::NullElem(); -            } - -            TSlot<TAux> StubbornPeek() { -                for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) { -                    auto result = ReadBunch->GetSlotAux(ReadSlot); -                    if (Y_LIKELY(result.Msg != nullptr)) { -                        return result; -                    } -                    SpinLockPause(); -                } - -                AddToReadLater(); -                return TSlot<TAux>::NullElem(); -            } - -            TSlot<TAux> PopSkipped() { -                do { -                    auto elem = ReadNow.front(); -                    ReadNow.pop_front(); - -                    auto result = elem.Bunch->GetSlotAux(elem.Slot); -                    if (Y_LIKELY(result.Msg != nullptr)) { -                        elem.Token->DecrementToken(); -                        return result; -                    } - -                    ReadLater.emplace_back(elem); - -                } while (ReadNow.size() > 0); - -                return TSlot<TAux>::NullElem(); -            } - -            TSlot<TAux> PeekSkipped() { -                do { -                    auto elem = ReadNow.front(); - -                    auto result = elem.Bunch->GetSlotAux(elem.Slot); -                    if (Y_LIKELY(result.Msg != nullptr)) { -                        return result; -                    } - -                    ReadNow.pop_front(); -                    ReadLater.emplace_back(elem); - -                } while (ReadNow.size() > 0); - -                return TSlot<TAux>::NullElem(); -            } -        }; - -        struct TDefaultParams { -            static constexpr ui32 BUNCH_SIZE = DEFAULT_BUNCH_SIZE; -            using TBunchBase = TEmpty; - +        class TReadBucket {  +        public:  +            using TAux = typename TWBucket::TUsingAux;  +            using TBunch = typename TWBucket::TBunch;  +  +            static constexpr int MAX_NUMBER_OF_TRIES_TO_READ = 5;  +  +            TReadBucket(TWBucket* writer)  +                : Writer(writer)  +                , ReadBunch(writer->GetLastBunch())  +                , LastKnownPushBunch(writer->GetLastBunch())  +            {  +                ReadBunch->DecrementToken(); // no previous token  +            }  +  +            TReadBucket(TReadBucket toCopy, TWBucket* writer)  +                : TReadBucket(std::move(toCopy))  +            {  +                Writer = writer;  +            }  +  +            ui64 ReadyCount() const {  +                return AtomicGet(Writer->SlotCounter) - ReadSlot;  +            }  +  +            TMsgLink Pop() {  +                return PopAux().Msg;  +            }  +  +            TMsgLink Peek() {  +                return PeekAux().Msg;  +            }  +  +            TSlot<TAux> PopAux() {  +                for (;;) {  +                    if (Y_UNLIKELY(ReadNow.size() != 0)) {  +                        auto result = PopSkipped();  +                        if (Y_LIKELY(result.Msg != nullptr)) {  +                            return result;  +                        }  +                    }  +  +                    if (Y_UNLIKELY(ReadSlot == LastKnownPushSlot)) {  +                        if (Y_LIKELY(!RereadPushSlot())) {  +                            return TSlot<TAux>::NullElem();  +                        }  +                        continue;  +                    }  +  +                    if (Y_UNLIKELY(!ReadBunch->IsSlotHere(ReadSlot))) {  +                        if (Y_UNLIKELY(!SwitchToNextBunch())) {  +                            return TSlot<TAux>::NullElem();  +                        }  +                    }  +  +                    auto result = ReadBunch->GetSlotAux(ReadSlot);  +                    if (Y_LIKELY(result.Msg != nullptr)) {  +                        ++ReadSlot;  +                        return result;  +                    }  +  +                    result = StubbornPop();  +                    if (Y_LIKELY(result.Msg != nullptr)) {  +                        return result;  +                    }  +                }  +            }  +  +            TSlot<TAux> PeekAux() {  +                for (;;) {  +                    if (Y_UNLIKELY(ReadNow.size() != 0)) {  +                        auto result = PeekSkipped();  +                        if (Y_LIKELY(result.Msg != nullptr)) {  +                            return result;  +                        }  +                    }  +  +                    if (Y_UNLIKELY(ReadSlot == LastKnownPushSlot)) {  +                        if (Y_LIKELY(!RereadPushSlot())) {  +                            return TSlot<TAux>::NullElem();  +                        }  +                        continue;  +                    }  +  +                    if (Y_UNLIKELY(!ReadBunch->IsSlotHere(ReadSlot))) {  +                        if (Y_UNLIKELY(!SwitchToNextBunch())) {  +                            return TSlot<TAux>::NullElem();  +                        }  +                    }  +  +                    auto result = ReadBunch->GetSlotAux(ReadSlot);  +                    if (Y_LIKELY(result.Msg != nullptr)) {  +                        return result;  +                    }  +  +                    result = StubbornPeek();  +                    if (Y_LIKELY(result.Msg != nullptr)) {  +                        return result;  +                    }  +                }  +            }  +  +        private:  +            TWBucket* Writer;  +            TBunch* ReadBunch;  +            ui64 ReadSlot = 0;  +            TBunch* LastKnownPushBunch;  +            ui64 LastKnownPushSlot = 0;  +  +            struct TSkipItem {  +                TBunch* Bunch;  +                ui64 Slot;  +                TBunch* Token;  +            };  +  +            TContainer<TSkipItem> ReadNow;  +            TContainer<TSkipItem> ReadLater;  +  +            void AddToReadLater() {  +                ReadLater.push_back({ReadBunch, ReadSlot, LastKnownPushBunch});  +                LastKnownPushBunch->IncrementToken();  +                ++ReadSlot;  +            }  +  +            // MUST BE: ReadSlot == LastKnownPushSlot  +            bool RereadPushSlot() {  +                ReadNow = std::move(ReadLater);  +                ReadLater.clear();  +  +                auto oldSlot = LastKnownPushSlot;  +  +                auto currentPushBunch = Writer->GetLastBunch();  +                auto currentPushSlot = AtomicGet(Writer->SlotCounter);  +  +                if (currentPushBunch != LastKnownPushBunch) {  +                    // LastKnownPushBunch could be invalid after this line  +                    LastKnownPushBunch->SetNextToken(currentPushBunch);  +                }  +  +                LastKnownPushBunch = currentPushBunch;  +                LastKnownPushSlot = currentPushSlot;  +  +                return oldSlot != LastKnownPushSlot;  +            }  +  +            bool SwitchToNextBunch() {  +                for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) {  +                    auto next = ReadBunch->GetNextBunch();  +                    if (next != nullptr) {  +                        ReadBunch = next;  +                        return true;  +                    }  +                    SpinLockPause();  +                }  +                return false;  +            }  +  +            TSlot<TAux> StubbornPop() {  +                for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) {  +                    auto result = ReadBunch->GetSlotAux(ReadSlot);  +                    if (Y_LIKELY(result.Msg != nullptr)) {  +                        ++ReadSlot;  +                        return result;  +                    }  +                    SpinLockPause();  +                }  +  +                AddToReadLater();  +                return TSlot<TAux>::NullElem();  +            }  +  +            TSlot<TAux> StubbornPeek() {  +                for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) {  +                    auto result = ReadBunch->GetSlotAux(ReadSlot);  +                    if (Y_LIKELY(result.Msg != nullptr)) {  +                        return result;  +                    }  +                    SpinLockPause();  +                }  +  +                AddToReadLater();  +                return TSlot<TAux>::NullElem();  +            }  +  +            TSlot<TAux> PopSkipped() {  +                do {  +                    auto elem = ReadNow.front();  +                    ReadNow.pop_front();  +  +                    auto result = elem.Bunch->GetSlotAux(elem.Slot);  +                    if (Y_LIKELY(result.Msg != nullptr)) {  +                        elem.Token->DecrementToken();  +                        return result;  +                    }  +  +                    ReadLater.emplace_back(elem);  +  +                } while (ReadNow.size() > 0);  +  +                return TSlot<TAux>::NullElem();  +            }  +  +            TSlot<TAux> PeekSkipped() {  +                do {  +                    auto elem = ReadNow.front();  +  +                    auto result = elem.Bunch->GetSlotAux(elem.Slot);  +                    if (Y_LIKELY(result.Msg != nullptr)) {  +                        return result;  +                    }  +  +                    ReadNow.pop_front();  +                    ReadLater.emplace_back(elem);  +  +                } while (ReadNow.size() > 0);  +  +                return TSlot<TAux>::NullElem();  +            }  +        };  +  +        struct TDefaultParams {  +            static constexpr ui32 BUNCH_SIZE = DEFAULT_BUNCH_SIZE;  +            using TBunchBase = TEmpty;  +               template <typename TElem, typename... TRest>              using TContainer = TDeque<TElem, TRest...>; - -            static constexpr bool DeleteItems = true; -        }; - -    } //namespace NReadAsFilledPrivate - -    DeclareTuneValueParam(TRaFQueueBunchSize, ui32, BUNCH_SIZE); -    DeclareTuneTypeParam(TRaFQueueBunchBase, TBunchBase); -    DeclareTuneContainer(TRaFQueueSkipContainer, TContainer); -    DeclareTuneValueParam(TRaFQueueDeleteItems, bool, DeleteItems); - +  +            static constexpr bool DeleteItems = true;  +        };  +  +    } //namespace NReadAsFilledPrivate  +  +    DeclareTuneValueParam(TRaFQueueBunchSize, ui32, BUNCH_SIZE);  +    DeclareTuneTypeParam(TRaFQueueBunchBase, TBunchBase);  +    DeclareTuneContainer(TRaFQueueSkipContainer, TContainer);  +    DeclareTuneValueParam(TRaFQueueDeleteItems, bool, DeleteItems);  +       template <typename TItem = void, typename... TParams> -    class TReadAsFilledQueue { -    private: -        using TTuned = TTune<NReadAsFilledPrivate::TDefaultParams, TParams...>; - -        static constexpr ui32 BUNCH_SIZE = TTuned::BUNCH_SIZE; - -        using TBunchBase = typename TTuned::TBunchBase; - +    class TReadAsFilledQueue {  +    private:  +        using TTuned = TTune<NReadAsFilledPrivate::TDefaultParams, TParams...>;  +  +        static constexpr ui32 BUNCH_SIZE = TTuned::BUNCH_SIZE;  +  +        using TBunchBase = typename TTuned::TBunchBase;  +           template <typename TElem, typename... TRest> -        using TContainer = -            typename TTuned::template TContainer<TElem, TRest...>; - -        using TWriteBucket = -            NReadAsFilledPrivate::TWriteBucket<BUNCH_SIZE, TBunchBase>; -        using TReadBucket = -            NReadAsFilledPrivate::TReadBucket<TWriteBucket, TContainer>; - -    public: -        TReadAsFilledQueue() -            : RBucket(&WBucket) -        { -        } - -        ~TReadAsFilledQueue() { -            if (TTuned::DeleteItems) { -                for (;;) { -                    auto msg = Pop(); -                    if (msg == nullptr) { -                        break; -                    } -                    TDelete::Destroy(msg); -                } -            } -        } - -        void Push(TItem* msg) { -            WBucket.Push((void*)msg, NReadAsFilledPrivate::TEmptyAux()); -        } - -        TItem* Pop() { -            return (TItem*)RBucket.Pop(); -        } - -        TItem* Peek() { -            return (TItem*)RBucket.Peek(); -        } - -    protected: -        TWriteBucket WBucket; -        TReadBucket RBucket; -    }; -} +        using TContainer =  +            typename TTuned::template TContainer<TElem, TRest...>;  +  +        using TWriteBucket =  +            NReadAsFilledPrivate::TWriteBucket<BUNCH_SIZE, TBunchBase>;  +        using TReadBucket =  +            NReadAsFilledPrivate::TReadBucket<TWriteBucket, TContainer>;  +  +    public:  +        TReadAsFilledQueue()  +            : RBucket(&WBucket)  +        {  +        }  +  +        ~TReadAsFilledQueue() {  +            if (TTuned::DeleteItems) {  +                for (;;) {  +                    auto msg = Pop();  +                    if (msg == nullptr) {  +                        break;  +                    }  +                    TDelete::Destroy(msg);  +                }  +            }  +        }  +  +        void Push(TItem* msg) {  +            WBucket.Push((void*)msg, NReadAsFilledPrivate::TEmptyAux());  +        }  +  +        TItem* Pop() {  +            return (TItem*)RBucket.Pop();  +        }  +  +        TItem* Peek() {  +            return (TItem*)RBucket.Peek();  +        }  +  +    protected:  +        TWriteBucket WBucket;  +        TReadBucket RBucket;  +    };  +}  diff --git a/library/cpp/threading/queue/mpsc_vinfarr_obstructive.cpp b/library/cpp/threading/queue/mpsc_vinfarr_obstructive.cpp index 2bd0c298216..00dbfeaa64e 100644 --- a/library/cpp/threading/queue/mpsc_vinfarr_obstructive.cpp +++ b/library/cpp/threading/queue/mpsc_vinfarr_obstructive.cpp @@ -1 +1 @@ -#include "mpsc_vinfarr_obstructive.h" +#include "mpsc_vinfarr_obstructive.h"  diff --git a/library/cpp/threading/queue/mpsc_vinfarr_obstructive.h b/library/cpp/threading/queue/mpsc_vinfarr_obstructive.h index 5f91f1b5a84..3e1ae923420 100644 --- a/library/cpp/threading/queue/mpsc_vinfarr_obstructive.h +++ b/library/cpp/threading/queue/mpsc_vinfarr_obstructive.h @@ -1,528 +1,528 @@ -#pragma once - -/* -  Semi-wait-free queue, multiple producers - one consumer. Strict order. -  The queue algorithm is using concept of virtual infinite array. - -  A producer takes a number from a counter and atomicaly increments the counter. -  The number taken is a number of a slot for the producer to put a new message -  into infinite array. - -  Then producer constructs a virtual infinite array by bidirectional linked list -  of blocks. Each block contains several slots. - -  There is a hint pointer which optimisticly points to the last block -  of the list and never goes backward. - -  Consumer exploits the property of the hint pointer always going forward -  to free old blocks eventually. Consumer periodically read the hint pointer -  and the counter and thus deduce producers which potentially holds the pointer -  to a block. Consumer can free the block if all that producers filled their -  slots and left the queue. - -  No producer can stop the progress for other producers. - -  Consumer can obstruct a slot of a delayed producer by putting special mark. -  Thus no producer can stop the progress for consumer. -  But a slow producer may be forced to retry unlimited number of times. -  Though it's very unlikely for a non-preempted producer to be obstructed. -  That's why the algorithm is semi-wait-free. - -  WARNING: there is no wait¬ify mechanic for consumer, -  consumer receives nullptr if queue was empty. - -  WARNING: though the algorithm itself is lock-free -  but producers and consumer could be blocked by memory allocator - -  WARNING: copy constructers of the queue are not thread-safe - */ - -#include <util/generic/noncopyable.h> -#include <util/generic/ptr.h> -#include <util/system/atomic.h> -#include <util/system/spinlock.h> - -#include "tune.h" - -namespace NThreading { -    namespace NObstructiveQueuePrivate { -        typedef void* TMsgLink; - -        struct TEmpty { -        }; - -        struct TEmptyAux { -            TEmptyAux Retrieve() const { -                return TEmptyAux(); -            } -            void Store(TEmptyAux&) { -            } -            static constexpr TEmptyAux Zero() { -                return TEmptyAux(); -            } -        }; - -        template <typename TAux> -        struct TSlot { -            TMsgLink volatile Msg; -            TAux AuxiliaryData; - -            inline void Store(TAux& aux) { -                AuxiliaryData.Store(aux); -            } - -            inline TAux Retrieve() const { -                return AuxiliaryData.Retrieve(); -            } - -            static TSlot<TAux> NullElem() { -                return {nullptr, TAux::Zero()}; -            } - -            static TSlot<TAux> Pair(TMsgLink msg, TAux aux) { -                return {msg, std::move(aux)}; -            } -        }; - -        template <> -        struct TSlot<TEmptyAux> { -            TMsgLink volatile Msg; -            inline void Store(TEmptyAux&) { -            } -            inline TEmptyAux Retrieve() const { -                return TEmptyAux(); -            } - -            static TSlot<TEmptyAux> NullElem() { -                return {nullptr}; -            } - -            static TSlot<TEmptyAux> Pair(TMsgLink msg, TEmptyAux) { -                return {msg}; -            } -        }; - -        enum TPushResult { -            PUSH_RESULT_OK, -            PUSH_RESULT_BACKWARD, -            PUSH_RESULT_FORWARD, -            PUSH_RESULT_BLOCKED, -        }; - -        template <typename TAux, ui32 BUNCH_SIZE, typename TBase = TEmpty> -        struct TMsgBunch: public TBase { -            ui64 FirstSlot; - -            TSlot<TAux> LinkArray[BUNCH_SIZE]; - -            TMsgBunch* volatile NextBunch; -            TMsgBunch* volatile BackLink; - -            ui64 volatile Token; -            TMsgBunch* volatile NextToken; - -            /* this push can return PUSH_RESULT_BLOCKED */ -            inline TPushResult Push(TMsgLink msg, ui64 slot, TAux auxiliary) { -                if (Y_UNLIKELY(slot < FirstSlot)) { -                    return PUSH_RESULT_BACKWARD; -                } - -                if (Y_UNLIKELY(slot >= FirstSlot + BUNCH_SIZE)) { -                    return PUSH_RESULT_FORWARD; -                } - -                LinkArray[slot - FirstSlot].Store(auxiliary); - -                auto oldValue = AtomicSwap(&LinkArray[slot - FirstSlot].Msg, msg); - -                if (Y_LIKELY(oldValue == nullptr)) { -                    return PUSH_RESULT_OK; -                } else { -                    LeaveBlocked(oldValue); -                    return PUSH_RESULT_BLOCKED; -                } -            } - -            inline bool IsSlotHere(ui64 slot) { -                return slot < FirstSlot + BUNCH_SIZE; -            } - -            inline TMsgLink GetSlot(ui64 slot) const { -                return AtomicGet(LinkArray[slot - FirstSlot].Msg); -            } - -            inline TSlot<TAux> GetSlotAux(ui64 slot) const { -                auto msg = GetSlot(slot); -                auto aux = LinkArray[slot - FirstSlot].Retrieve(); -                return TSlot<TAux>::Pair(msg, aux); -            } - -            void LeaveBlocked(ui64 slot) { -                auto token = GetToken(slot); -                token->DecrementToken(); -            } - -            void LeaveBlocked(TMsgLink msg) { -                auto token = reinterpret_cast<TMsgBunch*>(msg); -                token->DecrementToken(); -            } - -            TSlot<TAux> BlockSlotAux(ui64 slot, TMsgBunch* token) { -                auto old = -                    AtomicSwap(&LinkArray[slot - FirstSlot].Msg, (TMsgLink)token); -                if (old == nullptr) { -                    // It's valid to increment after AtomicCas -                    // because token will release data only after SetNextToken -                    token->IncrementToken(); -                    return TSlot<TAux>::NullElem(); -                } -                return TSlot<TAux>::Pair(old, LinkArray[slot - FirstSlot].Retrieve()); -            } - -            inline TMsgBunch* GetNextBunch() const { -                return AtomicGet(NextBunch); -            } - -            inline bool SetNextBunch(TMsgBunch* ptr) { -                return AtomicCas(&NextBunch, ptr, nullptr); -            } - -            inline TMsgBunch* GetBackLink() const { -                return AtomicGet(BackLink); -            } - -            inline TMsgBunch* GetToken(ui64 slot) { -                return reinterpret_cast<TMsgBunch*>(LinkArray[slot - FirstSlot].Msg); -            } - -            inline void IncrementToken() { -                AtomicIncrement(Token); -            } - -            // the object could be destroyed after this method -            inline void DecrementToken() { -                if (Y_UNLIKELY(AtomicDecrement(Token) == BUNCH_SIZE)) { -                    Release(this); -                    AtomicGet(NextToken)->DecrementToken(); -                    // this could be invalid here -                } -            } - -            // the object could be destroyed after this method -            inline void SetNextToken(TMsgBunch* next) { -                AtomicSet(NextToken, next); -                if (Y_UNLIKELY(AtomicAdd(Token, BUNCH_SIZE) == BUNCH_SIZE)) { -                    Release(this); -                    next->DecrementToken(); -                } -                // this could be invalid here -            } - -            TMsgBunch(ui64 start, TMsgBunch* backLink) { -                AtomicSet(FirstSlot, start); -                memset(&LinkArray, 0, sizeof(LinkArray)); -                AtomicSet(NextBunch, nullptr); -                AtomicSet(BackLink, backLink); - -                AtomicSet(Token, 1); -                AtomicSet(NextToken, nullptr); -            } - -            static void Release(TMsgBunch* bunch) { -                auto backLink = AtomicGet(bunch->BackLink); -                if (backLink == nullptr) { -                    return; -                } -                AtomicSet(bunch->BackLink, nullptr); - -                do { -                    auto bbackLink = backLink->BackLink; -                    delete backLink; -                    backLink = bbackLink; -                } while (backLink != nullptr); -            } - -            void Destroy() { -                for (auto tail = BackLink; tail != nullptr;) { -                    auto next = tail->BackLink; -                    delete tail; -                    tail = next; -                } - -                for (auto next = this; next != nullptr;) { -                    auto nnext = next->NextBunch; -                    delete next; -                    next = nnext; -                } -            } -        }; - -        template <typename TAux, ui32 BUNCH_SIZE, typename TBunchBase = TEmpty> -        class TWriteBucket { -        public: -            static const ui64 GROSS_SIZE; - -            using TBunch = TMsgBunch<TAux, BUNCH_SIZE, TBunchBase>; - -            TWriteBucket(TBunch* bunch = new TBunch(0, nullptr)) -                : LastBunch(bunch) -                , SlotCounter(0) -            { -            } - -            TWriteBucket(TWriteBucket&& move) -                : LastBunch(move.LastBunch) -                , SlotCounter(move.SlotCounter) -            { -                move.LastBunch = nullptr; -            } - -            ~TWriteBucket() { -                if (LastBunch != nullptr) { -                    LastBunch->Destroy(); -                } -            } - -            inline bool Push(TMsgLink msg, TAux aux) { -                ui64 pushSlot = AtomicGetAndIncrement(SlotCounter); -                TBunch* hintBunch = GetLastBunch(); - -                for (;;) { -                    auto hint = hintBunch->Push(msg, pushSlot, aux); -                    if (Y_LIKELY(hint == PUSH_RESULT_OK)) { -                        return true; -                    } -                    bool hhResult = HandleHint(hintBunch, hint); -                    if (Y_UNLIKELY(!hhResult)) { -                        return false; -                    } -                } -            } - -        protected: -            template <typename, ui32, typename> -            friend class TReadBucket; - -            TBunch* volatile LastBunch; // Hint -            volatile ui64 SlotCounter; - -            inline TBunch* GetLastBunch() const { -                return AtomicGet(LastBunch); -            } - -            bool HandleHint(TBunch*& hintBunch, TPushResult hint) { -                if (Y_UNLIKELY(hint == PUSH_RESULT_BLOCKED)) { -                    return false; -                } - -                if (Y_UNLIKELY(hint == PUSH_RESULT_BACKWARD)) { -                    hintBunch = hintBunch->GetBackLink(); -                    return true; -                } - -                // PUSH_RESULT_FORWARD -                auto nextBunch = hintBunch->GetNextBunch(); - -                if (nextBunch == nullptr) { -                    auto first = hintBunch->FirstSlot + BUNCH_SIZE; -                    nextBunch = new TBunch(first, hintBunch); -                    if (Y_UNLIKELY(!hintBunch->SetNextBunch(nextBunch))) { -                        delete nextBunch; -                        nextBunch = hintBunch->GetNextBunch(); -                    } -                } - -                // hintBunch could not be freed here so it cannot be reused -                // it's alright if this CAS was not succeeded, -                // it means that other thread did that recently -                AtomicCas(&LastBunch, nextBunch, hintBunch); - -                hintBunch = nextBunch; -                return true; -            } -        }; - -        template <typename TAux, ui32 BUNCH_SIZE, typename TBunchBase> -        class TReadBucket { -        public: -            static constexpr int MAX_NUMBER_OF_TRIES_TO_READ = 20; - -            using TWBucket = TWriteBucket<TAux, BUNCH_SIZE, TBunchBase>; -            using TBunch = TMsgBunch<TAux, BUNCH_SIZE, TBunchBase>; - -            TReadBucket(TWBucket* writer) -                : Writer(writer) -                , ReadBunch(writer->GetLastBunch()) -                , LastKnownPushBunch(writer->GetLastBunch()) -            { -                ReadBunch->DecrementToken(); // no previous token -            } - -            TReadBucket(TReadBucket toCopy, TWBucket* writer) -                : TReadBucket(std::move(toCopy)) -            { -                Writer = writer; -            } - -            ui64 ReadyCount() const { -                return AtomicGet(Writer->SlotCounter) - ReadSlot; -            } - -            inline TMsgLink Pop() { -                return PopAux().Msg; -            } - -            inline TSlot<TAux> PopAux() { -                for (;;) { -                    if (Y_UNLIKELY(ReadSlot == LastKnownPushSlot)) { -                        if (Y_LIKELY(!RereadPushSlot())) { -                            return TSlot<TAux>::NullElem(); -                        } -                    } - -                    if (Y_UNLIKELY(!ReadBunch->IsSlotHere(ReadSlot))) { -                        if (Y_UNLIKELY(!SwitchToNextBunch())) { -                            return TSlot<TAux>::NullElem(); -                        } -                    } - -                    auto result = ReadBunch->GetSlotAux(ReadSlot); -                    if (Y_LIKELY(result.Msg != nullptr)) { -                        ++ReadSlot; -                        return result; -                    } - -                    if (ReadSlot + 1 == AtomicGet(Writer->SlotCounter)) { -                        return TSlot<TAux>::NullElem(); -                    } - -                    result = StubbornPopAux(); - -                    if (result.Msg != nullptr) { -                        return result; -                    } -                } -            } - -        private: -            TWBucket* Writer; -            TBunch* ReadBunch; -            ui64 ReadSlot = 0; -            TBunch* LastKnownPushBunch; -            ui64 LastKnownPushSlot = 0; - -            // MUST BE: ReadSlot == LastKnownPushSlot -            bool RereadPushSlot() { -                auto oldSlot = LastKnownPushSlot; - -                auto currentPushBunch = Writer->GetLastBunch(); -                auto currentPushSlot = AtomicGet(Writer->SlotCounter); - -                if (currentPushBunch != LastKnownPushBunch) { -                    // LastKnownPushBunch could be invalid after this line -                    LastKnownPushBunch->SetNextToken(currentPushBunch); -                } - -                LastKnownPushBunch = currentPushBunch; -                LastKnownPushSlot = currentPushSlot; - -                return oldSlot != LastKnownPushSlot; -            } - -            bool SwitchToNextBunch() { -                for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) { -                    auto next = ReadBunch->GetNextBunch(); -                    if (next != nullptr) { -                        ReadBunch = next; -                        return true; -                    } -                    SpinLockPause(); -                } -                return false; -            } - -            TSlot<TAux> StubbornPopAux() { -                for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) { -                    auto result = ReadBunch->GetSlotAux(ReadSlot); -                    if (Y_LIKELY(result.Msg != nullptr)) { -                        ++ReadSlot; -                        return result; -                    } -                    SpinLockPause(); -                } - -                return ReadBunch->BlockSlotAux(ReadSlot++, LastKnownPushBunch); -            } -        }; - -        struct TDefaultParams { -            static constexpr bool DeleteItems = true; -            using TAux = NObstructiveQueuePrivate::TEmptyAux; -            using TBunchBase = NObstructiveQueuePrivate::TEmpty; -            static constexpr ui32 BUNCH_SIZE = 251; -        }; - -    } //namespace NObstructiveQueuePrivate - -    DeclareTuneValueParam(TObstructiveQueueBunchSize, ui32, BUNCH_SIZE); -    DeclareTuneValueParam(TObstructiveQueueDeleteItems, bool, DeleteItems); -    DeclareTuneTypeParam(TObstructiveQueueBunchBase, TBunchBase); -    DeclareTuneTypeParam(TObstructiveQueueAux, TAux); - +#pragma once  +  +/*  +  Semi-wait-free queue, multiple producers - one consumer. Strict order.  +  The queue algorithm is using concept of virtual infinite array.  +  +  A producer takes a number from a counter and atomicaly increments the counter.  +  The number taken is a number of a slot for the producer to put a new message  +  into infinite array.  +  +  Then producer constructs a virtual infinite array by bidirectional linked list  +  of blocks. Each block contains several slots.  +  +  There is a hint pointer which optimisticly points to the last block  +  of the list and never goes backward.  +  +  Consumer exploits the property of the hint pointer always going forward  +  to free old blocks eventually. Consumer periodically read the hint pointer  +  and the counter and thus deduce producers which potentially holds the pointer  +  to a block. Consumer can free the block if all that producers filled their  +  slots and left the queue.  +  +  No producer can stop the progress for other producers.  +  +  Consumer can obstruct a slot of a delayed producer by putting special mark.  +  Thus no producer can stop the progress for consumer.  +  But a slow producer may be forced to retry unlimited number of times.  +  Though it's very unlikely for a non-preempted producer to be obstructed.  +  That's why the algorithm is semi-wait-free.  +  +  WARNING: there is no wait¬ify mechanic for consumer,  +  consumer receives nullptr if queue was empty.  +  +  WARNING: though the algorithm itself is lock-free  +  but producers and consumer could be blocked by memory allocator  +  +  WARNING: copy constructers of the queue are not thread-safe  + */  +  +#include <util/generic/noncopyable.h>  +#include <util/generic/ptr.h>  +#include <util/system/atomic.h>  +#include <util/system/spinlock.h>  +  +#include "tune.h"  +  +namespace NThreading {  +    namespace NObstructiveQueuePrivate {  +        typedef void* TMsgLink;  +  +        struct TEmpty {  +        };  +  +        struct TEmptyAux {  +            TEmptyAux Retrieve() const {  +                return TEmptyAux();  +            }  +            void Store(TEmptyAux&) {  +            }  +            static constexpr TEmptyAux Zero() {  +                return TEmptyAux();  +            }  +        };  +  +        template <typename TAux>  +        struct TSlot {  +            TMsgLink volatile Msg;  +            TAux AuxiliaryData;  +  +            inline void Store(TAux& aux) {  +                AuxiliaryData.Store(aux);  +            }  +  +            inline TAux Retrieve() const {  +                return AuxiliaryData.Retrieve();  +            }  +  +            static TSlot<TAux> NullElem() {  +                return {nullptr, TAux::Zero()};  +            }  +  +            static TSlot<TAux> Pair(TMsgLink msg, TAux aux) {  +                return {msg, std::move(aux)};  +            }  +        };  +  +        template <>  +        struct TSlot<TEmptyAux> {  +            TMsgLink volatile Msg;  +            inline void Store(TEmptyAux&) {  +            }  +            inline TEmptyAux Retrieve() const {  +                return TEmptyAux();  +            }  +  +            static TSlot<TEmptyAux> NullElem() {  +                return {nullptr};  +            }  +  +            static TSlot<TEmptyAux> Pair(TMsgLink msg, TEmptyAux) {  +                return {msg};  +            }  +        };  +  +        enum TPushResult {  +            PUSH_RESULT_OK,  +            PUSH_RESULT_BACKWARD,  +            PUSH_RESULT_FORWARD,  +            PUSH_RESULT_BLOCKED,  +        };  +  +        template <typename TAux, ui32 BUNCH_SIZE, typename TBase = TEmpty>  +        struct TMsgBunch: public TBase {  +            ui64 FirstSlot;  +  +            TSlot<TAux> LinkArray[BUNCH_SIZE];  +  +            TMsgBunch* volatile NextBunch;  +            TMsgBunch* volatile BackLink;  +  +            ui64 volatile Token;  +            TMsgBunch* volatile NextToken;  +  +            /* this push can return PUSH_RESULT_BLOCKED */  +            inline TPushResult Push(TMsgLink msg, ui64 slot, TAux auxiliary) {  +                if (Y_UNLIKELY(slot < FirstSlot)) {  +                    return PUSH_RESULT_BACKWARD;  +                }  +  +                if (Y_UNLIKELY(slot >= FirstSlot + BUNCH_SIZE)) {  +                    return PUSH_RESULT_FORWARD;  +                }  +  +                LinkArray[slot - FirstSlot].Store(auxiliary);  +  +                auto oldValue = AtomicSwap(&LinkArray[slot - FirstSlot].Msg, msg);  +  +                if (Y_LIKELY(oldValue == nullptr)) {  +                    return PUSH_RESULT_OK;  +                } else {  +                    LeaveBlocked(oldValue);  +                    return PUSH_RESULT_BLOCKED;  +                }  +            }  +  +            inline bool IsSlotHere(ui64 slot) {  +                return slot < FirstSlot + BUNCH_SIZE;  +            }  +  +            inline TMsgLink GetSlot(ui64 slot) const {  +                return AtomicGet(LinkArray[slot - FirstSlot].Msg);  +            }  +  +            inline TSlot<TAux> GetSlotAux(ui64 slot) const {  +                auto msg = GetSlot(slot);  +                auto aux = LinkArray[slot - FirstSlot].Retrieve();  +                return TSlot<TAux>::Pair(msg, aux);  +            }  +  +            void LeaveBlocked(ui64 slot) {  +                auto token = GetToken(slot);  +                token->DecrementToken();  +            }  +  +            void LeaveBlocked(TMsgLink msg) {  +                auto token = reinterpret_cast<TMsgBunch*>(msg);  +                token->DecrementToken();  +            }  +  +            TSlot<TAux> BlockSlotAux(ui64 slot, TMsgBunch* token) {  +                auto old =  +                    AtomicSwap(&LinkArray[slot - FirstSlot].Msg, (TMsgLink)token);  +                if (old == nullptr) {  +                    // It's valid to increment after AtomicCas  +                    // because token will release data only after SetNextToken  +                    token->IncrementToken();  +                    return TSlot<TAux>::NullElem();  +                }  +                return TSlot<TAux>::Pair(old, LinkArray[slot - FirstSlot].Retrieve());  +            }  +  +            inline TMsgBunch* GetNextBunch() const {  +                return AtomicGet(NextBunch);  +            }  +  +            inline bool SetNextBunch(TMsgBunch* ptr) {  +                return AtomicCas(&NextBunch, ptr, nullptr);  +            }  +  +            inline TMsgBunch* GetBackLink() const {  +                return AtomicGet(BackLink);  +            }  +  +            inline TMsgBunch* GetToken(ui64 slot) {  +                return reinterpret_cast<TMsgBunch*>(LinkArray[slot - FirstSlot].Msg);  +            }  +  +            inline void IncrementToken() {  +                AtomicIncrement(Token);  +            }  +  +            // the object could be destroyed after this method  +            inline void DecrementToken() {  +                if (Y_UNLIKELY(AtomicDecrement(Token) == BUNCH_SIZE)) {  +                    Release(this);  +                    AtomicGet(NextToken)->DecrementToken();  +                    // this could be invalid here  +                }  +            }  +  +            // the object could be destroyed after this method  +            inline void SetNextToken(TMsgBunch* next) {  +                AtomicSet(NextToken, next);  +                if (Y_UNLIKELY(AtomicAdd(Token, BUNCH_SIZE) == BUNCH_SIZE)) {  +                    Release(this);  +                    next->DecrementToken();  +                }  +                // this could be invalid here  +            }  +  +            TMsgBunch(ui64 start, TMsgBunch* backLink) {  +                AtomicSet(FirstSlot, start);  +                memset(&LinkArray, 0, sizeof(LinkArray));  +                AtomicSet(NextBunch, nullptr);  +                AtomicSet(BackLink, backLink);  +  +                AtomicSet(Token, 1);  +                AtomicSet(NextToken, nullptr);  +            }  +  +            static void Release(TMsgBunch* bunch) {  +                auto backLink = AtomicGet(bunch->BackLink);  +                if (backLink == nullptr) {  +                    return;  +                }  +                AtomicSet(bunch->BackLink, nullptr);  +  +                do {  +                    auto bbackLink = backLink->BackLink;  +                    delete backLink;  +                    backLink = bbackLink;  +                } while (backLink != nullptr);  +            }  +  +            void Destroy() {  +                for (auto tail = BackLink; tail != nullptr;) {  +                    auto next = tail->BackLink;  +                    delete tail;  +                    tail = next;  +                }  +  +                for (auto next = this; next != nullptr;) {  +                    auto nnext = next->NextBunch;  +                    delete next;  +                    next = nnext;  +                }  +            }  +        };  +  +        template <typename TAux, ui32 BUNCH_SIZE, typename TBunchBase = TEmpty>  +        class TWriteBucket {  +        public:  +            static const ui64 GROSS_SIZE;  +  +            using TBunch = TMsgBunch<TAux, BUNCH_SIZE, TBunchBase>;  +  +            TWriteBucket(TBunch* bunch = new TBunch(0, nullptr))  +                : LastBunch(bunch)  +                , SlotCounter(0)  +            {  +            }  +  +            TWriteBucket(TWriteBucket&& move)  +                : LastBunch(move.LastBunch)  +                , SlotCounter(move.SlotCounter)  +            {  +                move.LastBunch = nullptr;  +            }  +  +            ~TWriteBucket() {  +                if (LastBunch != nullptr) {  +                    LastBunch->Destroy();  +                }  +            }  +  +            inline bool Push(TMsgLink msg, TAux aux) {  +                ui64 pushSlot = AtomicGetAndIncrement(SlotCounter);  +                TBunch* hintBunch = GetLastBunch();  +  +                for (;;) {  +                    auto hint = hintBunch->Push(msg, pushSlot, aux);  +                    if (Y_LIKELY(hint == PUSH_RESULT_OK)) {  +                        return true;  +                    }  +                    bool hhResult = HandleHint(hintBunch, hint);  +                    if (Y_UNLIKELY(!hhResult)) {  +                        return false;  +                    }  +                }  +            }  +  +        protected:  +            template <typename, ui32, typename>  +            friend class TReadBucket;  +  +            TBunch* volatile LastBunch; // Hint  +            volatile ui64 SlotCounter;  +  +            inline TBunch* GetLastBunch() const {  +                return AtomicGet(LastBunch);  +            }  +  +            bool HandleHint(TBunch*& hintBunch, TPushResult hint) {  +                if (Y_UNLIKELY(hint == PUSH_RESULT_BLOCKED)) {  +                    return false;  +                }  +  +                if (Y_UNLIKELY(hint == PUSH_RESULT_BACKWARD)) {  +                    hintBunch = hintBunch->GetBackLink();  +                    return true;  +                }  +  +                // PUSH_RESULT_FORWARD  +                auto nextBunch = hintBunch->GetNextBunch();  +  +                if (nextBunch == nullptr) {  +                    auto first = hintBunch->FirstSlot + BUNCH_SIZE;  +                    nextBunch = new TBunch(first, hintBunch);  +                    if (Y_UNLIKELY(!hintBunch->SetNextBunch(nextBunch))) {  +                        delete nextBunch;  +                        nextBunch = hintBunch->GetNextBunch();  +                    }  +                }  +  +                // hintBunch could not be freed here so it cannot be reused  +                // it's alright if this CAS was not succeeded,  +                // it means that other thread did that recently  +                AtomicCas(&LastBunch, nextBunch, hintBunch);  +  +                hintBunch = nextBunch;  +                return true;  +            }  +        };  +  +        template <typename TAux, ui32 BUNCH_SIZE, typename TBunchBase>  +        class TReadBucket {  +        public:  +            static constexpr int MAX_NUMBER_OF_TRIES_TO_READ = 20;  +  +            using TWBucket = TWriteBucket<TAux, BUNCH_SIZE, TBunchBase>;  +            using TBunch = TMsgBunch<TAux, BUNCH_SIZE, TBunchBase>;  +  +            TReadBucket(TWBucket* writer)  +                : Writer(writer)  +                , ReadBunch(writer->GetLastBunch())  +                , LastKnownPushBunch(writer->GetLastBunch())  +            {  +                ReadBunch->DecrementToken(); // no previous token  +            }  +  +            TReadBucket(TReadBucket toCopy, TWBucket* writer)  +                : TReadBucket(std::move(toCopy))  +            {  +                Writer = writer;  +            }  +  +            ui64 ReadyCount() const {  +                return AtomicGet(Writer->SlotCounter) - ReadSlot;  +            }  +  +            inline TMsgLink Pop() {  +                return PopAux().Msg;  +            }  +  +            inline TSlot<TAux> PopAux() {  +                for (;;) {  +                    if (Y_UNLIKELY(ReadSlot == LastKnownPushSlot)) {  +                        if (Y_LIKELY(!RereadPushSlot())) {  +                            return TSlot<TAux>::NullElem();  +                        }  +                    }  +  +                    if (Y_UNLIKELY(!ReadBunch->IsSlotHere(ReadSlot))) {  +                        if (Y_UNLIKELY(!SwitchToNextBunch())) {  +                            return TSlot<TAux>::NullElem();  +                        }  +                    }  +  +                    auto result = ReadBunch->GetSlotAux(ReadSlot);  +                    if (Y_LIKELY(result.Msg != nullptr)) {  +                        ++ReadSlot;  +                        return result;  +                    }  +  +                    if (ReadSlot + 1 == AtomicGet(Writer->SlotCounter)) {  +                        return TSlot<TAux>::NullElem();  +                    }  +  +                    result = StubbornPopAux();  +  +                    if (result.Msg != nullptr) {  +                        return result;  +                    }  +                }  +            }  +  +        private:  +            TWBucket* Writer;  +            TBunch* ReadBunch;  +            ui64 ReadSlot = 0;  +            TBunch* LastKnownPushBunch;  +            ui64 LastKnownPushSlot = 0;  +  +            // MUST BE: ReadSlot == LastKnownPushSlot  +            bool RereadPushSlot() {  +                auto oldSlot = LastKnownPushSlot;  +  +                auto currentPushBunch = Writer->GetLastBunch();  +                auto currentPushSlot = AtomicGet(Writer->SlotCounter);  +  +                if (currentPushBunch != LastKnownPushBunch) {  +                    // LastKnownPushBunch could be invalid after this line  +                    LastKnownPushBunch->SetNextToken(currentPushBunch);  +                }  +  +                LastKnownPushBunch = currentPushBunch;  +                LastKnownPushSlot = currentPushSlot;  +  +                return oldSlot != LastKnownPushSlot;  +            }  +  +            bool SwitchToNextBunch() {  +                for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) {  +                    auto next = ReadBunch->GetNextBunch();  +                    if (next != nullptr) {  +                        ReadBunch = next;  +                        return true;  +                    }  +                    SpinLockPause();  +                }  +                return false;  +            }  +  +            TSlot<TAux> StubbornPopAux() {  +                for (int q = 0; q < MAX_NUMBER_OF_TRIES_TO_READ; ++q) {  +                    auto result = ReadBunch->GetSlotAux(ReadSlot);  +                    if (Y_LIKELY(result.Msg != nullptr)) {  +                        ++ReadSlot;  +                        return result;  +                    }  +                    SpinLockPause();  +                }  +  +                return ReadBunch->BlockSlotAux(ReadSlot++, LastKnownPushBunch);  +            }  +        };  +  +        struct TDefaultParams {  +            static constexpr bool DeleteItems = true;  +            using TAux = NObstructiveQueuePrivate::TEmptyAux;  +            using TBunchBase = NObstructiveQueuePrivate::TEmpty;  +            static constexpr ui32 BUNCH_SIZE = 251;  +        };  +  +    } //namespace NObstructiveQueuePrivate  +  +    DeclareTuneValueParam(TObstructiveQueueBunchSize, ui32, BUNCH_SIZE);  +    DeclareTuneValueParam(TObstructiveQueueDeleteItems, bool, DeleteItems);  +    DeclareTuneTypeParam(TObstructiveQueueBunchBase, TBunchBase);  +    DeclareTuneTypeParam(TObstructiveQueueAux, TAux);  +       template <typename TItem = void, typename... TParams> -    class TObstructiveConsumerAuxQueue { -    private: -        using TTuned = -            TTune<NObstructiveQueuePrivate::TDefaultParams, TParams...>; - -        using TAux = typename TTuned::TAux; -        using TSlot = NObstructiveQueuePrivate::TSlot<TAux>; -        using TMsgLink = NObstructiveQueuePrivate::TMsgLink; -        using TBunchBase = typename TTuned::TBunchBase; -        static constexpr bool DeleteItems = TTuned::DeleteItems; -        static constexpr ui32 BUNCH_SIZE = TTuned::BUNCH_SIZE; - -    public: -        TObstructiveConsumerAuxQueue() -            : RBuckets(&WBucket) -        { -        } - -        ~TObstructiveConsumerAuxQueue() { -            if (DeleteItems) { -                for (;;) { -                    auto msg = Pop(); -                    if (msg == nullptr) { -                        break; -                    } -                    TDelete::Destroy(msg); -                } -            } -        } - -        void Push(TItem* msg) { -            while (!WBucket.Push(reinterpret_cast<TMsgLink>(msg), TAux())) { -            } -        } - -        TItem* Pop() { -            return reinterpret_cast<TItem*>(RBuckets.Pop()); -        } - -        TSlot PopAux() { -            return RBuckets.PopAux(); -        } - -    private: -        NObstructiveQueuePrivate::TWriteBucket<TAux, BUNCH_SIZE, TBunchBase> -            WBucket; -        NObstructiveQueuePrivate::TReadBucket<TAux, BUNCH_SIZE, TBunchBase> -            RBuckets; -    }; - -    template <typename TItem = void, bool DeleteItems = true> -    class TObstructiveConsumerQueue +    class TObstructiveConsumerAuxQueue {  +    private:  +        using TTuned =  +            TTune<NObstructiveQueuePrivate::TDefaultParams, TParams...>;  +  +        using TAux = typename TTuned::TAux;  +        using TSlot = NObstructiveQueuePrivate::TSlot<TAux>;  +        using TMsgLink = NObstructiveQueuePrivate::TMsgLink;  +        using TBunchBase = typename TTuned::TBunchBase;  +        static constexpr bool DeleteItems = TTuned::DeleteItems;  +        static constexpr ui32 BUNCH_SIZE = TTuned::BUNCH_SIZE;  +  +    public:  +        TObstructiveConsumerAuxQueue()  +            : RBuckets(&WBucket)  +        {  +        }  +  +        ~TObstructiveConsumerAuxQueue() {  +            if (DeleteItems) {  +                for (;;) {  +                    auto msg = Pop();  +                    if (msg == nullptr) {  +                        break;  +                    }  +                    TDelete::Destroy(msg);  +                }  +            }  +        }  +  +        void Push(TItem* msg) {  +            while (!WBucket.Push(reinterpret_cast<TMsgLink>(msg), TAux())) {  +            }  +        }  +  +        TItem* Pop() {  +            return reinterpret_cast<TItem*>(RBuckets.Pop());  +        }  +  +        TSlot PopAux() {  +            return RBuckets.PopAux();  +        }  +  +    private:  +        NObstructiveQueuePrivate::TWriteBucket<TAux, BUNCH_SIZE, TBunchBase>  +            WBucket;  +        NObstructiveQueuePrivate::TReadBucket<TAux, BUNCH_SIZE, TBunchBase>  +            RBuckets;  +    };  +  +    template <typename TItem = void, bool DeleteItems = true>  +    class TObstructiveConsumerQueue          : public TObstructiveConsumerAuxQueue<TItem,                                                TObstructiveQueueDeleteItems<DeleteItems>> { -    }; +    };   } diff --git a/library/cpp/threading/queue/queue_ut.cpp b/library/cpp/threading/queue/queue_ut.cpp index 80eca147da9..8b36437034c 100644 --- a/library/cpp/threading/queue/queue_ut.cpp +++ b/library/cpp/threading/queue/queue_ut.cpp @@ -1,242 +1,242 @@  #include <library/cpp/testing/unittest/registar.h> -#include <util/system/thread.h> - -#include "ut_helpers.h" - -typedef void* TMsgLink; - +#include <util/system/thread.h>  +  +#include "ut_helpers.h"  +  +typedef void* TMsgLink;  +   template <typename TQueueType> -class TQueueTestProcs: public TTestBase { -private: +class TQueueTestProcs: public TTestBase {  +private:       UNIT_TEST_SUITE_DEMANGLE(TQueueTestProcs<TQueueType>); -    UNIT_TEST(Threads2_Push1M_Threads1_Pop2M) -    UNIT_TEST(Threads4_Push1M_Threads1_Pop4M) -    UNIT_TEST(Threads8_RndPush100K_Threads8_Queues) +    UNIT_TEST(Threads2_Push1M_Threads1_Pop2M)  +    UNIT_TEST(Threads4_Push1M_Threads1_Pop4M)  +    UNIT_TEST(Threads8_RndPush100K_Threads8_Queues)       /* -    UNIT_TEST(Threads24_RndPush100K_Threads24_Queues) -    UNIT_TEST(Threads24_RndPush100K_Threads8_Queues) -    UNIT_TEST(Threads24_RndPush100K_Threads4_Queues) -*/ -    UNIT_TEST_SUITE_END(); - -public: -    void Push1M_Pop1M() { +    UNIT_TEST(Threads24_RndPush100K_Threads24_Queues)  +    UNIT_TEST(Threads24_RndPush100K_Threads8_Queues)  +    UNIT_TEST(Threads24_RndPush100K_Threads4_Queues)  +*/  +    UNIT_TEST_SUITE_END();  +  +public:  +    void Push1M_Pop1M() {           TQueueType queue; -        TMsgLink msg = &queue; - -        auto pmsg = queue.Pop(); -        UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); - -        for (int i = 0; i < 1000000; ++i) { -            queue.Push((char*)msg + i); -        } - -        for (int i = 0; i < 1000000; ++i) { -            auto popped = queue.Pop(); -            UNIT_ASSERT_EQUAL((char*)msg + i, popped); -        } - -        pmsg = queue.Pop(); -        UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); -    } - -    void Threads2_Push1M_Threads1_Pop2M() { +        TMsgLink msg = &queue;  +  +        auto pmsg = queue.Pop();  +        UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr);  +  +        for (int i = 0; i < 1000000; ++i) {  +            queue.Push((char*)msg + i);  +        }  +  +        for (int i = 0; i < 1000000; ++i) {  +            auto popped = queue.Pop();  +            UNIT_ASSERT_EQUAL((char*)msg + i, popped);  +        }  +  +        pmsg = queue.Pop();  +        UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr);  +    }  +  +    void Threads2_Push1M_Threads1_Pop2M() {           TQueueType queue; - +           class TPusherThread: public ISimpleThread { -        public: +        public:               TPusherThread(TQueueType& theQueue, char* start)                  : Queue(theQueue) -                , Arg(start) -            { -            } - +                , Arg(start)  +            {  +            }  +               TQueueType& Queue; -            char* Arg; - -            void* ThreadProc() override { -                for (int i = 0; i < 1000000; ++i) { -                    Queue.Push(Arg + i); -                } -                return nullptr; -            } -        }; - -        TPusherThread pusher1(queue, (char*)&queue); -        TPusherThread pusher2(queue, (char*)&queue + 2000000); - -        pusher1.Start(); -        pusher2.Start(); - -        for (int i = 0; i < 2000000; ++i) { -            while (queue.Pop() == nullptr) { -                SpinLockPause(); -            } -        } - -        auto pmsg = queue.Pop(); -        UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); -    } - -    void Threads4_Push1M_Threads1_Pop4M() { +            char* Arg;  +  +            void* ThreadProc() override {  +                for (int i = 0; i < 1000000; ++i) {  +                    Queue.Push(Arg + i);  +                }  +                return nullptr;  +            }  +        };  +  +        TPusherThread pusher1(queue, (char*)&queue);  +        TPusherThread pusher2(queue, (char*)&queue + 2000000);  +  +        pusher1.Start();  +        pusher2.Start();  +  +        for (int i = 0; i < 2000000; ++i) {  +            while (queue.Pop() == nullptr) {  +                SpinLockPause();  +            }  +        }  +  +        auto pmsg = queue.Pop();  +        UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr);  +    }  +  +    void Threads4_Push1M_Threads1_Pop4M() {           TQueueType queue; - +           class TPusherThread: public ISimpleThread { -        public: +        public:               TPusherThread(TQueueType& theQueue, char* start)                  : Queue(theQueue) -                , Arg(start) -            { -            } - +                , Arg(start)  +            {  +            }  +               TQueueType& Queue; -            char* Arg; - -            void* ThreadProc() override { -                for (int i = 0; i < 1000000; ++i) { -                    Queue.Push(Arg + i); -                } -                return nullptr; -            } -        }; - -        TPusherThread pusher1(queue, (char*)&queue); -        TPusherThread pusher2(queue, (char*)&queue + 2000000); -        TPusherThread pusher3(queue, (char*)&queue + 4000000); -        TPusherThread pusher4(queue, (char*)&queue + 6000000); - -        pusher1.Start(); -        pusher2.Start(); -        pusher3.Start(); -        pusher4.Start(); - -        for (int i = 0; i < 4000000; ++i) { -            while (queue.Pop() == nullptr) { -                SpinLockPause(); -            } -        } - -        auto pmsg = queue.Pop(); -        UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); -    } - -    template <size_t NUMBER_OF_PUSHERS, size_t NUMBER_OF_QUEUES> -    void ManyRndPush100K_ManyQueues() { +            char* Arg;  +  +            void* ThreadProc() override {  +                for (int i = 0; i < 1000000; ++i) {  +                    Queue.Push(Arg + i);  +                }  +                return nullptr;  +            }  +        };  +  +        TPusherThread pusher1(queue, (char*)&queue);  +        TPusherThread pusher2(queue, (char*)&queue + 2000000);  +        TPusherThread pusher3(queue, (char*)&queue + 4000000);  +        TPusherThread pusher4(queue, (char*)&queue + 6000000);  +  +        pusher1.Start();  +        pusher2.Start();  +        pusher3.Start();  +        pusher4.Start();  +  +        for (int i = 0; i < 4000000; ++i) {  +            while (queue.Pop() == nullptr) {  +                SpinLockPause();  +            }  +        }  +  +        auto pmsg = queue.Pop();  +        UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr);  +    }  +  +    template <size_t NUMBER_OF_PUSHERS, size_t NUMBER_OF_QUEUES>  +    void ManyRndPush100K_ManyQueues() {           TQueueType queue[NUMBER_OF_QUEUES]; - +           class TPusherThread: public ISimpleThread { -        public: +        public:               TPusherThread(TQueueType* queues, char* start) -                : Queues(queues) -                , Arg(start) -            { -            } - +                : Queues(queues)  +                , Arg(start)  +            {  +            }  +               TQueueType* Queues; -            char* Arg; - -            void* ThreadProc() override { -                ui64 counters[NUMBER_OF_QUEUES]; -                for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) { -                    counters[i] = 0; -                } - -                for (int i = 0; i < 100000; ++i) { -                    size_t rnd = GetCycleCount() % NUMBER_OF_QUEUES; -                    int cookie = counters[rnd]++; -                    Queues[rnd].Push(Arg + cookie); -                } - -                for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) { -                    Queues[i].Push((void*)2ULL); -                } - -                return nullptr; -            } -        }; - +            char* Arg;  +  +            void* ThreadProc() override {  +                ui64 counters[NUMBER_OF_QUEUES];  +                for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) {  +                    counters[i] = 0;  +                }  +  +                for (int i = 0; i < 100000; ++i) {  +                    size_t rnd = GetCycleCount() % NUMBER_OF_QUEUES;  +                    int cookie = counters[rnd]++;  +                    Queues[rnd].Push(Arg + cookie);  +                }  +  +                for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) {  +                    Queues[i].Push((void*)2ULL);  +                }  +  +                return nullptr;  +            }  +        };  +           class TPopperThread: public ISimpleThread { -        public: +        public:               TPopperThread(TQueueType* theQueue, char* base)                  : Queue(theQueue) -                , Base(base) -            { -            } - +                , Base(base)  +            {  +            }  +               TQueueType* Queue; -            char* Base; - -            void* ThreadProc() override { -                ui64 counters[NUMBER_OF_PUSHERS]; -                for (size_t i = 0; i < NUMBER_OF_PUSHERS; ++i) { -                    counters[i] = 0; -                } - -                for (size_t fin = 0; fin < NUMBER_OF_PUSHERS;) { -                    auto msg = Queue->Pop(); -                    if (msg == nullptr) { -                        SpinLockPause(); -                        continue; -                    } -                    if (msg == (void*)2ULL) { -                        ++fin; -                        continue; -                    } -                    ui64 shift = (char*)msg - Base; -                    auto pusherNum = shift / 200000000ULL; -                    auto msgNum = shift % 200000000ULL; - -                    UNIT_ASSERT_EQUAL(counters[pusherNum], msgNum); -                    ++counters[pusherNum]; -                } - -                auto pmsg = Queue->Pop(); -                UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); - -                return nullptr; -            } -        }; - +            char* Base;  +  +            void* ThreadProc() override {  +                ui64 counters[NUMBER_OF_PUSHERS];  +                for (size_t i = 0; i < NUMBER_OF_PUSHERS; ++i) {  +                    counters[i] = 0;  +                }  +  +                for (size_t fin = 0; fin < NUMBER_OF_PUSHERS;) {  +                    auto msg = Queue->Pop();  +                    if (msg == nullptr) {  +                        SpinLockPause();  +                        continue;  +                    }  +                    if (msg == (void*)2ULL) {  +                        ++fin;  +                        continue;  +                    }  +                    ui64 shift = (char*)msg - Base;  +                    auto pusherNum = shift / 200000000ULL;  +                    auto msgNum = shift % 200000000ULL;  +  +                    UNIT_ASSERT_EQUAL(counters[pusherNum], msgNum);  +                    ++counters[pusherNum];  +                }  +  +                auto pmsg = Queue->Pop();  +                UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr);  +  +                return nullptr;  +            }  +        };  +           TVector<TAutoPtr<TPopperThread>> poppers;          TVector<TAutoPtr<TPusherThread>> pushers; - -        for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) { -            poppers.emplace_back(new TPopperThread(&queue[i], (char*)&queue)); -            poppers.back()->Start(); -        } - -        for (size_t i = 0; i < NUMBER_OF_PUSHERS; ++i) { -            pushers.emplace_back( -                new TPusherThread(queue, (char*)&queue + 200000000ULL * i)); -            pushers.back()->Start(); -        } - -        for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) { -            poppers[i]->Join(); -        } - -        for (size_t i = 0; i < NUMBER_OF_PUSHERS; ++i) { -            pushers[i]->Join(); -        } -    } - -    void Threads8_RndPush100K_Threads8_Queues() { -        ManyRndPush100K_ManyQueues<8, 8>(); -    } - -    /* -    void Threads24_RndPush100K_Threads24_Queues() { -        ManyRndPush100K_ManyQueues<24, 24>(); -    } - -    void Threads24_RndPush100K_Threads8_Queues() { -        ManyRndPush100K_ManyQueues<24, 8>(); -    } - -    void Threads24_RndPush100K_Threads4_Queues() { -        ManyRndPush100K_ManyQueues<24, 4>(); -    } -    */ -}; - -REGISTER_TESTS_FOR_ALL_ORDERED_QUEUES(TQueueTestProcs); +  +        for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) {  +            poppers.emplace_back(new TPopperThread(&queue[i], (char*)&queue));  +            poppers.back()->Start();  +        }  +  +        for (size_t i = 0; i < NUMBER_OF_PUSHERS; ++i) {  +            pushers.emplace_back(  +                new TPusherThread(queue, (char*)&queue + 200000000ULL * i));  +            pushers.back()->Start();  +        }  +  +        for (size_t i = 0; i < NUMBER_OF_QUEUES; ++i) {  +            poppers[i]->Join();  +        }  +  +        for (size_t i = 0; i < NUMBER_OF_PUSHERS; ++i) {  +            pushers[i]->Join();  +        }  +    }  +  +    void Threads8_RndPush100K_Threads8_Queues() {  +        ManyRndPush100K_ManyQueues<8, 8>();  +    }  +  +    /*  +    void Threads24_RndPush100K_Threads24_Queues() {  +        ManyRndPush100K_ManyQueues<24, 24>();  +    }  +  +    void Threads24_RndPush100K_Threads8_Queues() {  +        ManyRndPush100K_ManyQueues<24, 8>();  +    }  +  +    void Threads24_RndPush100K_Threads4_Queues() {  +        ManyRndPush100K_ManyQueues<24, 4>();  +    }  +    */  +};  +  +REGISTER_TESTS_FOR_ALL_ORDERED_QUEUES(TQueueTestProcs);  diff --git a/library/cpp/threading/queue/tune.h b/library/cpp/threading/queue/tune.h index 50fc3dc17cd..43ad5efe3ef 100644 --- a/library/cpp/threading/queue/tune.h +++ b/library/cpp/threading/queue/tune.h @@ -1,101 +1,101 @@ -#pragma once - -/* -  Motivation: consider you have a template class with many parameters -  with default associations - -  template <typename A = TDefA, -            typename B = TDefB, -            typename C = TDefC, -            typename D = TDefD> -  class TExample { -  }; - -  consider you would like to provide easy to use interface to tune all -  these parameters in position independed manner, -  In that case TTune would be helpful for you. - -  How to use: -  First step: declare a struct with all default associations - -  struct TDefaultTune { -      using TStructA = TDefA; -      using TStructB = TDefB; -      using TStructC = TDefC; -      using TStructD = TDefD; -  }; - -  Second step: declare helper names visible to a user - -  DeclareTuneTypeParam(TTuneParamA, TStructA); -  DeclareTuneTypeParam(TTuneParamB, TStructB); -  DeclareTuneTypeParam(TTuneParamC, TStructC); -  DeclareTuneTypeParam(TTuneParamD, TStructD); - -  Third step: declare TExample this way: - -  template <typename...TParams> -  class TExample { -      using TMyParams = TTune<TDefaultTune, TParams...>; - -      using TActualA = TMyParams::TStructA; -      using TActualB = TMyParams::TStructB; -      ... -  }; - -  TTune<TDefaultTune, TParams...> is a struct with the default parameteres -  taken from TDefaultTune and overridden from "TParams...". - -  for example:  "TTune<TDefaultTune, TTuneParamC<TUserClass>>" -  will be virtually the same as: - -  struct TTunedClass { -      using TStructA = TDefA; -      using TStructB = TDefB; -      using TStructC = TUserClass; -      using TStructD = TDefD; -  }; - -  From now on you can tune your TExample in the following manner: - -  using TCustomClass = -      TExample <TTuneParamA<TUserStruct1>, TTuneParamD<TUserStruct2>>; - -  You can also tweak constant expressions in your TDefaultTune. -  Consider you have: - -  struct TDefaultTune { -      static constexpr ui32 MySize = 42; -  }; - -  declare an interface to modify the parameter this way: - -  DeclareTuneValueParam(TStructSize, ui32, MySize); - -  and tweak your class: - -  using TTwiceBigger = TExample<TStructSize<84>>; - - */ - -#define DeclareTuneTypeParam(TParamName, InternalName) \ -    template <typename TNewType>                       \ -    struct TParamName {                                \ -        template <typename TBase>                      \ -        struct TApply: public TBase {                  \ -            using InternalName = TNewType;             \ -        };                                             \ -    } - -#define DeclareTuneValueParam(TParamName, TValueType, InternalName) \ -    template <TValueType NewValue>                                  \ -    struct TParamName {                                             \ -        template <typename TBase>                                   \ -        struct TApply: public TBase {                               \ -            static constexpr TValueType InternalName = NewValue;    \ -        };                                                          \ -    } - +#pragma once  +  +/*  +  Motivation: consider you have a template class with many parameters  +  with default associations  +  +  template <typename A = TDefA,  +            typename B = TDefB,  +            typename C = TDefC,  +            typename D = TDefD>  +  class TExample {  +  };  +  +  consider you would like to provide easy to use interface to tune all  +  these parameters in position independed manner,  +  In that case TTune would be helpful for you.  +  +  How to use:  +  First step: declare a struct with all default associations  +  +  struct TDefaultTune {  +      using TStructA = TDefA;  +      using TStructB = TDefB;  +      using TStructC = TDefC;  +      using TStructD = TDefD;  +  };  +  +  Second step: declare helper names visible to a user  +  +  DeclareTuneTypeParam(TTuneParamA, TStructA);  +  DeclareTuneTypeParam(TTuneParamB, TStructB);  +  DeclareTuneTypeParam(TTuneParamC, TStructC);  +  DeclareTuneTypeParam(TTuneParamD, TStructD);  +  +  Third step: declare TExample this way:  +  +  template <typename...TParams>  +  class TExample {  +      using TMyParams = TTune<TDefaultTune, TParams...>;  +  +      using TActualA = TMyParams::TStructA;  +      using TActualB = TMyParams::TStructB;  +      ...  +  };  +  +  TTune<TDefaultTune, TParams...> is a struct with the default parameteres  +  taken from TDefaultTune and overridden from "TParams...".  +  +  for example:  "TTune<TDefaultTune, TTuneParamC<TUserClass>>"  +  will be virtually the same as:  +  +  struct TTunedClass {  +      using TStructA = TDefA;  +      using TStructB = TDefB;  +      using TStructC = TUserClass;  +      using TStructD = TDefD;  +  };  +  +  From now on you can tune your TExample in the following manner:  +  +  using TCustomClass =  +      TExample <TTuneParamA<TUserStruct1>, TTuneParamD<TUserStruct2>>;  +  +  You can also tweak constant expressions in your TDefaultTune.  +  Consider you have:  +  +  struct TDefaultTune {  +      static constexpr ui32 MySize = 42;  +  };  +  +  declare an interface to modify the parameter this way:  +  +  DeclareTuneValueParam(TStructSize, ui32, MySize);  +  +  and tweak your class:  +  +  using TTwiceBigger = TExample<TStructSize<84>>;  +  + */  +  +#define DeclareTuneTypeParam(TParamName, InternalName) \  +    template <typename TNewType>                       \  +    struct TParamName {                                \  +        template <typename TBase>                      \  +        struct TApply: public TBase {                  \  +            using InternalName = TNewType;             \  +        };                                             \  +    }  +  +#define DeclareTuneValueParam(TParamName, TValueType, InternalName) \  +    template <TValueType NewValue>                                  \  +    struct TParamName {                                             \  +        template <typename TBase>                                   \  +        struct TApply: public TBase {                               \  +            static constexpr TValueType InternalName = NewValue;    \  +        };                                                          \  +    }  +   #define DeclareTuneContainer(TParamName, InternalName)              \      template <template <typename, typename...> class TNewContainer> \      struct TParamName {                                             \ @@ -104,22 +104,22 @@              template <typename TElem, typename... TRest>            \              using InternalName = TNewContainer<TElem, TRest...>;    \          };                                                          \ -    } - -namespace NTunePrivate { -    template <typename TBase, typename... TParams> -    struct TFold; - -    template <typename TBase> -    struct TFold<TBase>: public TBase { -    }; - -    template <typename TBase, typename TFirstArg, typename... TRest> -    struct TFold<TBase, TFirstArg, TRest...> -       : public TFold<typename TFirstArg::template TApply<TBase>, TRest...> { -    }; -} - -template <typename TDefault, typename... TParams> -struct TTune: public NTunePrivate::TFold<TDefault, TParams...> { -}; +    }  +  +namespace NTunePrivate {  +    template <typename TBase, typename... TParams>  +    struct TFold;  +  +    template <typename TBase>  +    struct TFold<TBase>: public TBase {  +    };  +  +    template <typename TBase, typename TFirstArg, typename... TRest>  +    struct TFold<TBase, TFirstArg, TRest...>  +       : public TFold<typename TFirstArg::template TApply<TBase>, TRest...> {  +    };  +}  +  +template <typename TDefault, typename... TParams>  +struct TTune: public NTunePrivate::TFold<TDefault, TParams...> {  +};  diff --git a/library/cpp/threading/queue/tune_ut.cpp b/library/cpp/threading/queue/tune_ut.cpp index 7e980d3e27e..64bc8fd4279 100644 --- a/library/cpp/threading/queue/tune_ut.cpp +++ b/library/cpp/threading/queue/tune_ut.cpp @@ -1,118 +1,118 @@  #include <library/cpp/testing/unittest/registar.h> -#include "tune.h" - -struct TDefaultStructA { -}; - -struct TDefaultStructB { -}; - -struct TDefaults { -    using TStructA = TDefaultStructA; -    using TStructB = TDefaultStructB; -    static constexpr ui32 Param1 = 42; -    static constexpr ui32 Param2 = 42; -}; - -DeclareTuneTypeParam(TweakStructA, TStructA); -DeclareTuneTypeParam(TweakStructB, TStructB); -DeclareTuneValueParam(TweakParam1, ui32, Param1); -DeclareTuneValueParam(TweakParam2, ui32, Param2); - +#include "tune.h"  +  +struct TDefaultStructA {  +};  +  +struct TDefaultStructB {  +};  +  +struct TDefaults {  +    using TStructA = TDefaultStructA;  +    using TStructB = TDefaultStructB;  +    static constexpr ui32 Param1 = 42;  +    static constexpr ui32 Param2 = 42;  +};  +  +DeclareTuneTypeParam(TweakStructA, TStructA);  +DeclareTuneTypeParam(TweakStructB, TStructB);  +DeclareTuneValueParam(TweakParam1, ui32, Param1);  +DeclareTuneValueParam(TweakParam2, ui32, Param2);  +   Y_UNIT_TEST_SUITE(TestTuning) {      Y_UNIT_TEST(Defaults) { -        using TTuned = TTune<TDefaults>; -        using TunedA = TTuned::TStructA; -        using TunedB = TTuned::TStructB; -        auto sameA = std::is_same<TDefaultStructA, TunedA>::value; -        auto sameB = std::is_same<TDefaultStructB, TunedB>::value; -        auto param1 = TTuned::Param1; -        auto param2 = TTuned::Param2; - -        UNIT_ASSERT(sameA); -        UNIT_ASSERT(sameB); -        UNIT_ASSERT_EQUAL(param1, 42); -        UNIT_ASSERT_EQUAL(param2, 42); -    } - +        using TTuned = TTune<TDefaults>;  +        using TunedA = TTuned::TStructA;  +        using TunedB = TTuned::TStructB;  +        auto sameA = std::is_same<TDefaultStructA, TunedA>::value;  +        auto sameB = std::is_same<TDefaultStructB, TunedB>::value;  +        auto param1 = TTuned::Param1;  +        auto param2 = TTuned::Param2;  +  +        UNIT_ASSERT(sameA);  +        UNIT_ASSERT(sameB);  +        UNIT_ASSERT_EQUAL(param1, 42);  +        UNIT_ASSERT_EQUAL(param2, 42);  +    }  +       Y_UNIT_TEST(TuneStructA) { -        struct TMyStruct { -        }; - -        using TTuned = TTune<TDefaults, TweakStructA<TMyStruct>>; - -        using TunedA = TTuned::TStructA; -        using TunedB = TTuned::TStructB; -        //auto sameA = std::is_same<TDefaultStructA, TunedA>::value; -        auto sameB = std::is_same<TDefaultStructB, TunedB>::value; -        auto param1 = TTuned::Param1; -        auto param2 = TTuned::Param2; - -        auto sameA = std::is_same<TMyStruct, TunedA>::value; - -        UNIT_ASSERT(sameA); -        UNIT_ASSERT(sameB); -        UNIT_ASSERT_EQUAL(param1, 42); -        UNIT_ASSERT_EQUAL(param2, 42); -    } - +        struct TMyStruct {  +        };  +  +        using TTuned = TTune<TDefaults, TweakStructA<TMyStruct>>;  +  +        using TunedA = TTuned::TStructA;  +        using TunedB = TTuned::TStructB;  +        //auto sameA = std::is_same<TDefaultStructA, TunedA>::value;  +        auto sameB = std::is_same<TDefaultStructB, TunedB>::value;  +        auto param1 = TTuned::Param1;  +        auto param2 = TTuned::Param2;  +  +        auto sameA = std::is_same<TMyStruct, TunedA>::value;  +  +        UNIT_ASSERT(sameA);  +        UNIT_ASSERT(sameB);  +        UNIT_ASSERT_EQUAL(param1, 42);  +        UNIT_ASSERT_EQUAL(param2, 42);  +    }  +       Y_UNIT_TEST(TuneParam1) { -        using TTuned = TTune<TDefaults, TweakParam1<24>>; - -        using TunedA = TTuned::TStructA; -        using TunedB = TTuned::TStructB; -        auto sameA = std::is_same<TDefaultStructA, TunedA>::value; -        auto sameB = std::is_same<TDefaultStructB, TunedB>::value; -        auto param1 = TTuned::Param1; -        auto param2 = TTuned::Param2; - -        UNIT_ASSERT(sameA); -        UNIT_ASSERT(sameB); -        UNIT_ASSERT_EQUAL(param1, 24); -        UNIT_ASSERT_EQUAL(param2, 42); -    } - +        using TTuned = TTune<TDefaults, TweakParam1<24>>;  +  +        using TunedA = TTuned::TStructA;  +        using TunedB = TTuned::TStructB;  +        auto sameA = std::is_same<TDefaultStructA, TunedA>::value;  +        auto sameB = std::is_same<TDefaultStructB, TunedB>::value;  +        auto param1 = TTuned::Param1;  +        auto param2 = TTuned::Param2;  +  +        UNIT_ASSERT(sameA);  +        UNIT_ASSERT(sameB);  +        UNIT_ASSERT_EQUAL(param1, 24);  +        UNIT_ASSERT_EQUAL(param2, 42);  +    }  +       Y_UNIT_TEST(TuneStructAAndParam1) { -        struct TMyStruct { -        }; - -        using TTuned = -            TTune<TDefaults, TweakStructA<TMyStruct>, TweakParam1<24>>; - -        using TunedA = TTuned::TStructA; -        using TunedB = TTuned::TStructB; -        //auto sameA = std::is_same<TDefaultStructA, TunedA>::value; -        auto sameB = std::is_same<TDefaultStructB, TunedB>::value; -        auto param1 = TTuned::Param1; -        auto param2 = TTuned::Param2; - -        auto sameA = std::is_same<TMyStruct, TunedA>::value; - -        UNIT_ASSERT(sameA); -        UNIT_ASSERT(sameB); -        UNIT_ASSERT_EQUAL(param1, 24); -        UNIT_ASSERT_EQUAL(param2, 42); -    } - +        struct TMyStruct {  +        };  +  +        using TTuned =  +            TTune<TDefaults, TweakStructA<TMyStruct>, TweakParam1<24>>;  +  +        using TunedA = TTuned::TStructA;  +        using TunedB = TTuned::TStructB;  +        //auto sameA = std::is_same<TDefaultStructA, TunedA>::value;  +        auto sameB = std::is_same<TDefaultStructB, TunedB>::value;  +        auto param1 = TTuned::Param1;  +        auto param2 = TTuned::Param2;  +  +        auto sameA = std::is_same<TMyStruct, TunedA>::value;  +  +        UNIT_ASSERT(sameA);  +        UNIT_ASSERT(sameB);  +        UNIT_ASSERT_EQUAL(param1, 24);  +        UNIT_ASSERT_EQUAL(param2, 42);  +    }  +       Y_UNIT_TEST(TuneParam1AndStructA) { -        struct TMyStruct { -        }; - -        using TTuned = -            TTune<TDefaults, TweakParam1<24>, TweakStructA<TMyStruct>>; - -        using TunedA = TTuned::TStructA; -        using TunedB = TTuned::TStructB; -        //auto sameA = std::is_same<TDefaultStructA, TunedA>::value; -        auto sameB = std::is_same<TDefaultStructB, TunedB>::value; -        auto param1 = TTuned::Param1; -        auto param2 = TTuned::Param2; - -        auto sameA = std::is_same<TMyStruct, TunedA>::value; - -        UNIT_ASSERT(sameA); -        UNIT_ASSERT(sameB); -        UNIT_ASSERT_EQUAL(param1, 24); -        UNIT_ASSERT_EQUAL(param2, 42); -    } -} +        struct TMyStruct {  +        };  +  +        using TTuned =  +            TTune<TDefaults, TweakParam1<24>, TweakStructA<TMyStruct>>;  +  +        using TunedA = TTuned::TStructA;  +        using TunedB = TTuned::TStructB;  +        //auto sameA = std::is_same<TDefaultStructA, TunedA>::value;  +        auto sameB = std::is_same<TDefaultStructB, TunedB>::value;  +        auto param1 = TTuned::Param1;  +        auto param2 = TTuned::Param2;  +  +        auto sameA = std::is_same<TMyStruct, TunedA>::value;  +  +        UNIT_ASSERT(sameA);  +        UNIT_ASSERT(sameB);  +        UNIT_ASSERT_EQUAL(param1, 24);  +        UNIT_ASSERT_EQUAL(param2, 42);  +    }  +}  diff --git a/library/cpp/threading/queue/unordered_ut.cpp b/library/cpp/threading/queue/unordered_ut.cpp index a43b7f520e5..2018538bf77 100644 --- a/library/cpp/threading/queue/unordered_ut.cpp +++ b/library/cpp/threading/queue/unordered_ut.cpp @@ -1,154 +1,154 @@  #include <library/cpp/testing/unittest/registar.h> -#include <util/system/thread.h> -#include <algorithm> -#include <util/generic/vector.h> -#include <util/random/fast.h> - -#include "ut_helpers.h" - +#include <util/system/thread.h>  +#include <algorithm>  +#include <util/generic/vector.h>  +#include <util/random/fast.h>  +  +#include "ut_helpers.h"  +   template <typename TQueueType> -class TTestUnorderedQueue: public TTestBase { -private: -    using TLink = TIntrusiveLink; - +class TTestUnorderedQueue: public TTestBase {  +private:  +    using TLink = TIntrusiveLink;  +       UNIT_TEST_SUITE_DEMANGLE(TTestUnorderedQueue<TQueueType>); -    UNIT_TEST(Push1M_Pop1M_Unordered) -    UNIT_TEST_SUITE_END(); - -public: -    void Push1M_Pop1M_Unordered() { -        constexpr int REPEAT = 1000000; +    UNIT_TEST(Push1M_Pop1M_Unordered)  +    UNIT_TEST_SUITE_END();  +  +public:  +    void Push1M_Pop1M_Unordered() {  +        constexpr int REPEAT = 1000000;           TQueueType queue; -        TLink msg[REPEAT]; - -        auto pmsg = queue.Pop(); -        UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); - -        for (int i = 0; i < REPEAT; ++i) { -            queue.Push(&msg[i]); -        } - +        TLink msg[REPEAT];  +  +        auto pmsg = queue.Pop();  +        UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr);  +  +        for (int i = 0; i < REPEAT; ++i) {  +            queue.Push(&msg[i]);  +        }  +           TVector<TLink*> popped; -        popped.reserve(REPEAT); -        for (int i = 0; i < REPEAT; ++i) { -            popped.push_back((TLink*)queue.Pop()); -        } - -        pmsg = queue.Pop(); -        UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr); - -        std::sort(popped.begin(), popped.end()); -        for (int i = 0; i < REPEAT; ++i) { -            UNIT_ASSERT_VALUES_EQUAL(&msg[i], popped[i]); -        } -    } -}; - +        popped.reserve(REPEAT);  +        for (int i = 0; i < REPEAT; ++i) {  +            popped.push_back((TLink*)queue.Pop());  +        }  +  +        pmsg = queue.Pop();  +        UNIT_ASSERT_VALUES_EQUAL(pmsg, nullptr);  +  +        std::sort(popped.begin(), popped.end());  +        for (int i = 0; i < REPEAT; ++i) {  +            UNIT_ASSERT_VALUES_EQUAL(&msg[i], popped[i]);  +        }  +    }  +};  +   template <typename TQueueType> -class TTestWeakQueue: public TTestBase { -private: +class TTestWeakQueue: public TTestBase {  +private:       UNIT_TEST_SUITE_DEMANGLE(TTestWeakQueue<TQueueType>); -    UNIT_TEST(Threads8_Rnd_Exchange) -    UNIT_TEST_SUITE_END(); - -public: -    template <ui16 COUNT = 48, ui32 MSG_COUNT = 10000> -    void ManyThreadsRndExchange() { +    UNIT_TEST(Threads8_Rnd_Exchange)  +    UNIT_TEST_SUITE_END();  +  +public:  +    template <ui16 COUNT = 48, ui32 MSG_COUNT = 10000>  +    void ManyThreadsRndExchange() {           TQueueType queues[COUNT]; - +           class TWorker: public ISimpleThread { -        public: -            TWorker( +        public:  +            TWorker(                   TQueueType* queues_,                  ui16 mine,                  TAtomic* pushDone) -                : Queues(queues_) -                , MineQueue(mine) -                , PushDone(pushDone) -            { -            } - +                : Queues(queues_)  +                , MineQueue(mine)  +                , PushDone(pushDone)  +            {  +            }  +               TQueueType* Queues; -            ui16 MineQueue; +            ui16 MineQueue;               TVector<uintptr_t> Received; -            TAtomic* PushDone; - -            void* ThreadProc() override { -                TReallyFastRng32 rng(GetCycleCount()); -                Received.reserve(MSG_COUNT * 2); - -                for (ui32 loop = 1; loop <= MSG_COUNT; ++loop) { -                    for (;;) { -                        auto msg = Queues[MineQueue].Pop(); -                        if (msg == nullptr) { -                            break; -                        } - -                        Received.push_back((uintptr_t)msg); -                    } - -                    ui16 rnd = rng.GenRand64() % COUNT; -                    ui64 msg = ((ui64)MineQueue << 32) + loop; -                    while (!Queues[rnd].Push((void*)msg)) { -                    } -                } - -                AtomicIncrement(*PushDone); - -                for (;;) { -                    bool isItLast = AtomicGet(*PushDone) == COUNT; -                    auto msg = Queues[MineQueue].Pop(); -                    if (msg != nullptr) { -                        Received.push_back((uintptr_t)msg); -                    } else { -                        if (isItLast) { -                            break; -                        } -                        SpinLockPause(); -                    } -                } - -                for (ui64 last = 0;;) { -                    auto msg = Queues[MineQueue].UnsafeScanningPop(&last); -                    if (msg == nullptr) { -                        break; -                    } -                    Received.push_back((uintptr_t)msg); -                } - -                return nullptr; -            } -        }; - +            TAtomic* PushDone;  +  +            void* ThreadProc() override {  +                TReallyFastRng32 rng(GetCycleCount());  +                Received.reserve(MSG_COUNT * 2);  +  +                for (ui32 loop = 1; loop <= MSG_COUNT; ++loop) {  +                    for (;;) {  +                        auto msg = Queues[MineQueue].Pop();  +                        if (msg == nullptr) {  +                            break;  +                        }  +  +                        Received.push_back((uintptr_t)msg);  +                    }  +  +                    ui16 rnd = rng.GenRand64() % COUNT;  +                    ui64 msg = ((ui64)MineQueue << 32) + loop;  +                    while (!Queues[rnd].Push((void*)msg)) {  +                    }  +                }  +  +                AtomicIncrement(*PushDone);  +  +                for (;;) {  +                    bool isItLast = AtomicGet(*PushDone) == COUNT;  +                    auto msg = Queues[MineQueue].Pop();  +                    if (msg != nullptr) {  +                        Received.push_back((uintptr_t)msg);  +                    } else {  +                        if (isItLast) {  +                            break;  +                        }  +                        SpinLockPause();  +                    }  +                }  +  +                for (ui64 last = 0;;) {  +                    auto msg = Queues[MineQueue].UnsafeScanningPop(&last);  +                    if (msg == nullptr) {  +                        break;  +                    }  +                    Received.push_back((uintptr_t)msg);  +                }  +  +                return nullptr;  +            }  +        };  +           TVector<TAutoPtr<TWorker>> workers; -        TAtomic pushDone = 0; - -        for (ui32 i = 0; i < COUNT; ++i) { -            workers.emplace_back(new TWorker(&queues[0], i, &pushDone)); -            workers.back()->Start(); -        } - +        TAtomic pushDone = 0;  +  +        for (ui32 i = 0; i < COUNT; ++i) {  +            workers.emplace_back(new TWorker(&queues[0], i, &pushDone));  +            workers.back()->Start();  +        }  +           TVector<uintptr_t> all; -        for (ui32 i = 0; i < COUNT; ++i) { -            workers[i]->Join(); -            all.insert(all.begin(), +        for (ui32 i = 0; i < COUNT; ++i) {  +            workers[i]->Join();  +            all.insert(all.begin(),                          workers[i]->Received.begin(), workers[i]->Received.end()); -        } - -        std::sort(all.begin(), all.end()); -        auto iter = all.begin(); -        for (ui32 i = 0; i < COUNT; ++i) { -            for (ui32 k = 1; k <= MSG_COUNT; ++k) { -                UNIT_ASSERT_VALUES_EQUAL(((ui64)i << 32) + k, *iter); -                ++iter; -            } -        } -    } - -    void Threads8_Rnd_Exchange() { -        ManyThreadsRndExchange<8>(); -    } -}; - -REGISTER_TESTS_FOR_ALL_UNORDERED_QUEUES(TTestUnorderedQueue); -UNIT_TEST_SUITE_REGISTRATION(TTestWeakQueue<TMPMCUnorderedRing>); +        }  +  +        std::sort(all.begin(), all.end());  +        auto iter = all.begin();  +        for (ui32 i = 0; i < COUNT; ++i) {  +            for (ui32 k = 1; k <= MSG_COUNT; ++k) {  +                UNIT_ASSERT_VALUES_EQUAL(((ui64)i << 32) + k, *iter);  +                ++iter;  +            }  +        }  +    }  +  +    void Threads8_Rnd_Exchange() {  +        ManyThreadsRndExchange<8>();  +    }  +};  +  +REGISTER_TESTS_FOR_ALL_UNORDERED_QUEUES(TTestUnorderedQueue);  +UNIT_TEST_SUITE_REGISTRATION(TTestWeakQueue<TMPMCUnorderedRing>);  diff --git a/library/cpp/threading/queue/ut/ya.make b/library/cpp/threading/queue/ut/ya.make index 8883d9bf693..dda204155eb 100644 --- a/library/cpp/threading/queue/ut/ya.make +++ b/library/cpp/threading/queue/ut/ya.make @@ -1,16 +1,16 @@  UNITTEST_FOR(library/cpp/threading/queue) - +   OWNER(agri) - -ALLOCATOR(B) - -SRCS( -    basic_ut.cpp -    queue_ut.cpp -    tune_ut.cpp -    unordered_ut.cpp -    ut_helpers.cpp -    ut_helpers.h -) - -END() +  +ALLOCATOR(B)  +  +SRCS(  +    basic_ut.cpp  +    queue_ut.cpp  +    tune_ut.cpp  +    unordered_ut.cpp  +    ut_helpers.cpp  +    ut_helpers.h  +)  +  +END()  diff --git a/library/cpp/threading/queue/ut_helpers.cpp b/library/cpp/threading/queue/ut_helpers.cpp index aa3a8314411..342aa125a0b 100644 --- a/library/cpp/threading/queue/ut_helpers.cpp +++ b/library/cpp/threading/queue/ut_helpers.cpp @@ -1 +1 @@ -#include "ut_helpers.h" +#include "ut_helpers.h"  diff --git a/library/cpp/threading/queue/ut_helpers.h b/library/cpp/threading/queue/ut_helpers.h index 2756b52601e..c7203665930 100644 --- a/library/cpp/threading/queue/ut_helpers.h +++ b/library/cpp/threading/queue/ut_helpers.h @@ -1,40 +1,40 @@ -#pragma once - -#include "mpsc_read_as_filled.h" -#include "mpsc_htswap.h" -#include "mpsc_vinfarr_obstructive.h" -#include "mpsc_intrusive_unordered.h" -#include "mpmc_unordered_ring.h" - -struct TBasicHTSwap: public NThreading::THTSwapQueue<> { -}; - -struct TBasicReadAsFilled: public NThreading::TReadAsFilledQueue<> { -}; - -struct TBasicObstructiveConsumer +#pragma once  +  +#include "mpsc_read_as_filled.h"  +#include "mpsc_htswap.h"  +#include "mpsc_vinfarr_obstructive.h"  +#include "mpsc_intrusive_unordered.h"  +#include "mpmc_unordered_ring.h"  +  +struct TBasicHTSwap: public NThreading::THTSwapQueue<> {  +};  +  +struct TBasicReadAsFilled: public NThreading::TReadAsFilledQueue<> {  +};  +  +struct TBasicObstructiveConsumer      : public NThreading::TObstructiveConsumerQueue<> { -}; - -struct TBasicMPSCIntrusiveUnordered +};  +  +struct TBasicMPSCIntrusiveUnordered      : public NThreading::TMPSCIntrusiveUnordered { -}; - -struct TIntrusiveLink: public NThreading::TIntrusiveNode { -}; - -struct TMPMCUnorderedRing: public NThreading::TMPMCUnorderedRing { -    TMPMCUnorderedRing() -        : NThreading::TMPMCUnorderedRing(10000000) -    { -    } -}; - +};  +  +struct TIntrusiveLink: public NThreading::TIntrusiveNode {  +};  +  +struct TMPMCUnorderedRing: public NThreading::TMPMCUnorderedRing {  +    TMPMCUnorderedRing()  +        : NThreading::TMPMCUnorderedRing(10000000)  +    {  +    }  +};  +   #define REGISTER_TESTS_FOR_ALL_ORDERED_QUEUES(TestTemplate)         \      UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TBasicHTSwap>);       \      UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TBasicReadAsFilled>); \ -    UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TBasicObstructiveConsumer>) - +    UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TBasicObstructiveConsumer>)  +   #define REGISTER_TESTS_FOR_ALL_UNORDERED_QUEUES(TestTemplate)                 \ -    UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TBasicMPSCIntrusiveUnordered>); \ -    UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TMPMCUnorderedRing>); +    UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TBasicMPSCIntrusiveUnordered>); \  +    UNIT_TEST_SUITE_REGISTRATION(TestTemplate<TMPMCUnorderedRing>);  diff --git a/library/cpp/threading/queue/ya.make b/library/cpp/threading/queue/ya.make index 6570b38ce59..3a11eb2d925 100644 --- a/library/cpp/threading/queue/ya.make +++ b/library/cpp/threading/queue/ya.make @@ -1,18 +1,18 @@ -LIBRARY() - -OWNER(agri) - -SRCS( -    mpmc_unordered_ring.cpp -    mpmc_unordered_ring.h -    mpsc_htswap.cpp -    mpsc_htswap.h -    mpsc_intrusive_unordered.cpp -    mpsc_intrusive_unordered.h -    mpsc_read_as_filled.cpp -    mpsc_read_as_filled.h -    mpsc_vinfarr_obstructive.cpp -    mpsc_vinfarr_obstructive.h -) - -END() +LIBRARY()  +  +OWNER(agri)  +  +SRCS(  +    mpmc_unordered_ring.cpp  +    mpmc_unordered_ring.h  +    mpsc_htswap.cpp  +    mpsc_htswap.h  +    mpsc_intrusive_unordered.cpp  +    mpsc_intrusive_unordered.h  +    mpsc_read_as_filled.cpp  +    mpsc_read_as_filled.h  +    mpsc_vinfarr_obstructive.cpp  +    mpsc_vinfarr_obstructive.h  +)  +  +END()  | 
