summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorfixthgame <[email protected]>2023-10-26 12:53:26 +0300
committerfixthgame <[email protected]>2023-10-26 13:24:11 +0300
commitff9e99596eacad8108b1b5045c7e92de08453be0 (patch)
treed0198469d7d886d9784a91cb01634b0105bf3538
parent0920aa55db3cd7885f34a2937f005ea41fd27600 (diff)
Blend16 + BlendVar funcs for simd
Blend + Log Store test
-rw-r--r--.mapping.json6
-rw-r--r--ydb/library/yql/utils/simd/CMakeLists.txt1
-rw-r--r--ydb/library/yql/utils/simd/exec/CMakeLists.txt9
-rw-r--r--ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.darwin-x86_64.txt27
-rw-r--r--ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-aarch64.txt32
-rw-r--r--ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-x86_64.txt34
-rw-r--r--ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.txt17
-rw-r--r--ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.windows-x86_64.txt22
-rw-r--r--ydb/library/yql/utils/simd/exec/stream_store/main.cpp108
-rw-r--r--ydb/library/yql/utils/simd/exec/stream_store/ya.make5
-rw-r--r--ydb/library/yql/utils/simd/exec/ya.make1
-rw-r--r--ydb/library/yql/utils/simd/simd.h7
-rw-r--r--ydb/library/yql/utils/simd/simd_avx2.h56
-rw-r--r--ydb/library/yql/utils/simd/simd_fallback.h73
-rw-r--r--ydb/library/yql/utils/simd/simd_sse42.h67
-rw-r--r--ydb/library/yql/utils/simd/simd_ut.cpp16
-rw-r--r--ydb/library/yql/utils/simd/ya.make16
17 files changed, 484 insertions, 13 deletions
diff --git a/.mapping.json b/.mapping.json
index 2bd8ca82b5e..d29890b1993 100644
--- a/.mapping.json
+++ b/.mapping.json
@@ -8736,6 +8736,12 @@
"ydb/library/yql/utils/log/ut/CMakeLists.txt":"",
"ydb/library/yql/utils/log/ut/CMakeLists.windows-x86_64.txt":"",
"ydb/library/yql/utils/simd/CMakeLists.txt":"",
+ "ydb/library/yql/utils/simd/exec/CMakeLists.txt":"",
+ "ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.darwin-x86_64.txt":"",
+ "ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-aarch64.txt":"",
+ "ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-x86_64.txt":"",
+ "ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.txt":"",
+ "ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.windows-x86_64.txt":"",
"ydb/library/yql/utils/simd/ut/CMakeLists.darwin-x86_64.txt":"",
"ydb/library/yql/utils/simd/ut/CMakeLists.linux-aarch64.txt":"",
"ydb/library/yql/utils/simd/ut/CMakeLists.linux-x86_64.txt":"",
diff --git a/ydb/library/yql/utils/simd/CMakeLists.txt b/ydb/library/yql/utils/simd/CMakeLists.txt
index 1703b0a27bf..4118c479b56 100644
--- a/ydb/library/yql/utils/simd/CMakeLists.txt
+++ b/ydb/library/yql/utils/simd/CMakeLists.txt
@@ -6,4 +6,5 @@
# original buildsystem will not be accepted.
+add_subdirectory(exec)
add_subdirectory(ut)
diff --git a/ydb/library/yql/utils/simd/exec/CMakeLists.txt b/ydb/library/yql/utils/simd/exec/CMakeLists.txt
new file mode 100644
index 00000000000..1949327f1ea
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(stream_store)
diff --git a/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 00000000000..a99ac27d6f9
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,27 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(stream_store)
+target_link_libraries(stream_store PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+)
+target_link_options(stream_store PRIVATE
+ -Wl,-platform_version,macos,11.0,11.0
+ -fPIC
+ -fPIC
+)
+target_sources(stream_store PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/utils/simd/exec/stream_store/main.cpp
+)
+target_allocator(stream_store
+ system_allocator
+)
+vcs_info(stream_store)
diff --git a/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-aarch64.txt b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-aarch64.txt
new file mode 100644
index 00000000000..049d3baab76
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,32 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(stream_store)
+target_link_libraries(stream_store PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+)
+target_link_options(stream_store PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(stream_store PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/utils/simd/exec/stream_store/main.cpp
+)
+target_allocator(stream_store
+ cpp-malloc-jemalloc
+)
+vcs_info(stream_store)
diff --git a/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-x86_64.txt b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-x86_64.txt
new file mode 100644
index 00000000000..cbf6c43e02e
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,34 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(stream_store)
+target_link_libraries(stream_store PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+)
+target_link_options(stream_store PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(stream_store PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/utils/simd/exec/stream_store/main.cpp
+)
+target_allocator(stream_store
+ cpp-malloc-tcmalloc
+ libs-tcmalloc-no_percpu_cache
+)
+vcs_info(stream_store)
diff --git a/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.txt b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.txt
new file mode 100644
index 00000000000..f8b31df0c11
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.windows-x86_64.txt b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.windows-x86_64.txt
new file mode 100644
index 00000000000..a00f4d06e7f
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,22 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(stream_store)
+target_link_libraries(stream_store PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+)
+target_sources(stream_store PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/utils/simd/exec/stream_store/main.cpp
+)
+target_allocator(stream_store
+ system_allocator
+)
+vcs_info(stream_store)
diff --git a/ydb/library/yql/utils/simd/exec/stream_store/main.cpp b/ydb/library/yql/utils/simd/exec/stream_store/main.cpp
new file mode 100644
index 00000000000..926f47e0d9e
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/stream_store/main.cpp
@@ -0,0 +1,108 @@
+
+#include <util/generic/ptr.h>
+#include <util/system/cpu_id.h>
+#include <util/system/types.h>
+
+#include <ydb/library/yql/utils/simd/simd.h>
+
+struct TPerfomancer {
+ TPerfomancer() = default;
+
+ struct TWrapWorker {
+ virtual int StreamLoad(bool log) = 0;
+ virtual ~TWrapWorker() = default;
+ };
+
+ template<typename TTraits>
+ struct TWorker : TWrapWorker {
+ template<typename T>
+ using TSimd = typename TTraits::template TSimd8<T>;
+ TWorker() = default;
+
+ void Info() {
+ if (TTraits::Size == 8) {
+ Cerr << "Fallback implementation:" << Endl;
+ } else if (TTraits::Size == 16) {
+ Cerr << "SSE42 implementation:" << Endl;
+ } else if (TTraits::Size == 32) {
+ Cerr << "AVX2 implementation:" << Endl;
+ }
+ }
+
+ int StreamLoad(bool log = true) override {
+ const size_t batch = 32 / TTraits::Size;
+ const size_t batch_size = TTraits::Size / 8;
+ size_t log_batch_size = 0;
+ if (TTraits::Size == 8) {
+ log_batch_size = 0;
+ } else if (TTraits::Size == 16) {
+ log_batch_size = 1;
+ } else {
+ log_batch_size = 2;
+ }
+
+ const size_t size = (32LL << 17);
+ i64 buf[size / 8] __attribute__((aligned(32)));
+ i64 tmp[4];
+ for (size_t i = 0; i < 4; i += 1) {
+ tmp[i] = i;
+ }
+ TSimd<i8> tmpSimd[batch];
+ for (int i = 0; i < 4; i += batch_size) {
+ tmpSimd[i >> log_batch_size] = TSimd<i8>((i8*) (tmp + i));
+ }
+
+ std::chrono::steady_clock::time_point begin01 =
+ std::chrono::steady_clock::now();
+
+ const size_t size_loop = size / 8;
+
+ for (size_t i = 0; i < size_loop; i += 4) {
+ for (size_t j = 0; j < batch; j += 1) {
+ tmpSimd[j].StoreStream((i8*)(buf + i + j * batch_size));
+ }
+ }
+
+ bool is_ok = true;
+
+ for (size_t i = 0; i < size_loop; i += 1) {
+ if (buf[i] != i % 4) {
+ is_ok = false;
+ }
+ }
+
+ std::chrono::steady_clock::time_point end01 =
+ std::chrono::steady_clock::now();
+
+ ui64 microseconds =
+ std::chrono::duration_cast<std::chrono::microseconds>(end01 - begin01)
+ .count();
+ if (log) {
+ Info();
+ Cerr << "Time for stream load = " << microseconds << "[microseconds]"
+ << Endl;
+ Cerr << "Data size = " << (size / (1024 * 1024))
+ << " [MB]" << Endl;
+ Cerr << "Stream load/save/accum speed = "
+ << (size * 1000 * 1000) /
+ (1024 * 1024 * (microseconds + 1))
+ << " MB/sec" << Endl;
+ Cerr << Endl;
+ }
+ return is_ok;
+ }
+
+ ~TWorker() = default;
+ };
+
+ template<typename TTraits>
+ THolder<TWrapWorker> Create() const {
+ return MakeHolder<TWorker<TTraits>>();
+ };
+};
+
+int main() {
+ TPerfomancer tp;
+ auto worker = NSimd::SelectSimdTraits(tp);
+ return !worker->StreamLoad(false);
+} \ No newline at end of file
diff --git a/ydb/library/yql/utils/simd/exec/stream_store/ya.make b/ydb/library/yql/utils/simd/exec/stream_store/ya.make
new file mode 100644
index 00000000000..631001cbdc7
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/stream_store/ya.make
@@ -0,0 +1,5 @@
+PROGRAM()
+
+SRCS(main.cpp)
+
+END() \ No newline at end of file
diff --git a/ydb/library/yql/utils/simd/exec/ya.make b/ydb/library/yql/utils/simd/exec/ya.make
new file mode 100644
index 00000000000..91e55d4d64d
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/ya.make
@@ -0,0 +1 @@
+RECURSE(stream_store) \ No newline at end of file
diff --git a/ydb/library/yql/utils/simd/simd.h b/ydb/library/yql/utils/simd/simd.h
index ba94889351a..2c77f462a5e 100644
--- a/ydb/library/yql/utils/simd/simd.h
+++ b/ydb/library/yql/utils/simd/simd.h
@@ -1,5 +1,10 @@
#pragma once
+#include <util/system/cpu_id.h>
+#include <util/system/types.h>
+
+#include <stdlib.h>
+
#include "simd_avx2.h"
#include "simd_sse42.h"
#include "simd_fallback.h"
@@ -8,9 +13,9 @@ namespace NSimd {
template<int RegisterSize, typename TBaseRegister, template<typename> typename TSimd>
struct TSimdTraits {
+ using TRegister = TBaseRegister;
template<typename T>
using TSimd8 = TSimd<T>;
- using TRegister = TBaseRegister;
static constexpr int Size = RegisterSize;
};
diff --git a/ydb/library/yql/utils/simd/simd_avx2.h b/ydb/library/yql/utils/simd/simd_avx2.h
index f6c1979fe50..9a75a4aead3 100644
--- a/ydb/library/yql/utils/simd/simd_avx2.h
+++ b/ydb/library/yql/utils/simd/simd_avx2.h
@@ -3,9 +3,11 @@
#include <cstdint>
#include <immintrin.h>
+#include <util/system/types.h>
+#include <util/stream/output.h>
+#include <util/generic/string.h>
#pragma clang attribute push(__attribute__((target("avx2"))), apply_to=function)
-
namespace NSimd {
namespace NAVX2 {
@@ -73,6 +75,15 @@ struct TBase8: TBase<TSimd8<T>> {
{
}
+ template<int N>
+ inline TSimd8<T> Blend16(const TSimd8<T> other) {
+ return _mm256_blend_epi16(this->Value, other->Value, N);
+ }
+
+ inline TSimd8<T> BlendVar(const TSimd8<T> other, const TSimd8<T> mask) {
+ return _mm256_blendv_epi8(this->Value, other->Value, mask);
+ }
+
friend inline Mask operator==(const TSimd8<T> lhs, const TSimd8<T> rhs) {
return _mm256_cmpeq_epi8(lhs.Value, rhs.Value);
}
@@ -133,6 +144,26 @@ struct TBase8Numeric: TBase8<T> {
return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(values));
}
+ static inline TSimd8<T> LoadAligned(const T values[32]) {
+ return _mm256_load_si256(reinterpret_cast<const __m256i *>(values));
+ }
+
+ inline void LoadStream(T dst[16]) const {
+ return _mm256_stream_load_si256(reinterpret_cast<__m256i *>(dst), this->Value);
+ }
+
+ inline void Store(T dst[32]) const {
+ return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), this->Value);
+ }
+
+ inline void StoreAligned(T dst[32]) const {
+ return _mm256_store_si256(reinterpret_cast<__m256i *>(dst), this->Value);
+ }
+
+ inline void StoreStream(T dst[16]) const {
+ return _mm256_stream_si256(reinterpret_cast<__m256i *>(dst), this->Value);
+ }
+
static inline TSimd8<T> Repeat16(
T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7,
T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15
@@ -145,8 +176,27 @@ struct TBase8Numeric: TBase8<T> {
);
}
- inline void Store(T dst[32]) const {
- return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), this->Value);
+ template<typename TOut>
+ void Log(IOutputStream& out, TString delimeter = " ", TString end = "\n") {
+ const size_t n = sizeof(this->Value) / sizeof(TOut);
+ TOut buf[n];
+ this->Store((i8*) buf);
+ if (n == sizeof(this->Value)) {
+ for (size_t i = 0; i < n; i += 1) {
+ out << int(buf[i]);
+ if (i + 1 < n) {
+ out << delimeter;
+ }
+ }
+ } else {
+ for (size_t i = 0; i < n; i += 1) {
+ out << buf[i];
+ if (i + 1 < n) {
+ out << delimeter;
+ }
+ }
+ }
+ out << end;
}
inline TSimd8<T> operator+(const TSimd8<T> other) const {
diff --git a/ydb/library/yql/utils/simd/simd_fallback.h b/ydb/library/yql/utils/simd/simd_fallback.h
index 0bdaf3e303b..b1d7814c428 100644
--- a/ydb/library/yql/utils/simd/simd_fallback.h
+++ b/ydb/library/yql/utils/simd/simd_fallback.h
@@ -3,6 +3,10 @@
#include <cstdint>
#include <immintrin.h>
+#include <util/system/types.h>
+#include <util/stream/output.h>
+#include <util/generic/string.h>
+
namespace NSimd {
namespace NFallback {
@@ -70,6 +74,33 @@ struct TBase8: TBase<TSimd8<T>> {
{
}
+ template<int N>
+ inline TSimd8<T> Blend16(const TSimd8<T> other) {
+ ui64 dst = 0;
+ size_t j = (1 << 16) - 1;
+ for (size_t i = 0; i < 4; i += 1, j <<= 16) {
+ if (N & (1LL << i)) {
+ dst |= other->Value & j;
+ } else {
+ dst |= this->Value & j;
+ }
+ }
+ return TSimd8<T>(dst);
+ }
+
+ inline TSimd8<T> BlendVar(const TSimd8<T> other, const TSimd8<T> mask) {
+ ui64 dst = 0;
+ size_t j = (1 << 8) - 1;
+ for (size_t i = 0; i < 8; i += 1, j <<= 8) {
+ if (mask.Value & (1LL << i)) {
+ dst |= other->Value & j;
+ } else {
+ dst |= this->Value & j;
+ }
+ }
+ return TSimd8<T>(dst);
+ }
+
friend inline Mask operator==(const TSimd8<T> lhs, const TSimd8<T> rhs) {
return lhs.Value == rhs.Value;
}
@@ -134,10 +165,52 @@ struct TBase8Numeric: TBase8<T> {
return TSimd8<T>(*((const ui64*) values));
}
+ static inline TSimd8<T> LoadAligned(const T values[8]) {
+ return Load(values);
+ }
+
+ static inline TSimd8<T> LoadStream(const T values[8]) {
+ return Load(values);
+ }
+
inline void Store(T dst[8]) const {
*((ui64*) dst) = this->Value;
}
+ inline void StoreAligned(T dst[8]) const {
+ Store(dst);
+ }
+
+ inline void StoreStream(T dst[8]) const {
+ Store(dst);
+ }
+
+ template<typename TOut>
+ void Log(IOutputStream& out, TString delimeter = " ", TString end = "\n") {
+ const size_t n = sizeof(this->Value) / sizeof(TOut);
+ TOut buf[n];
+ Store((i8*) buf);
+ if (n == sizeof(this->Value)) {
+ for (size_t i = 0; i < n; i += 1) {
+ out << int(buf[i]);
+ if (i + 1 < n) {
+ out << delimeter;
+ } else {
+ out << end;
+ }
+ }
+ } else {
+ for (size_t i = 0; i < n; i += 1) {
+ out << buf[i];
+ if (i + 1 < n) {
+ out << delimeter;
+ } else {
+ out << end;
+ }
+ }
+ }
+ }
+
inline TSimd8<T> operator+(const TSimd8<T> other) const {
return this->Value + other.Value;
}
diff --git a/ydb/library/yql/utils/simd/simd_sse42.h b/ydb/library/yql/utils/simd/simd_sse42.h
index 3e6e1a1c9a8..9ec4c850b81 100644
--- a/ydb/library/yql/utils/simd/simd_sse42.h
+++ b/ydb/library/yql/utils/simd/simd_sse42.h
@@ -3,8 +3,11 @@
#include <cstdint>
#include <immintrin.h>
-#pragma clang attribute push(__attribute__((target("sse4.2"))), apply_to=function)
+#include <util/system/types.h>
+#include <util/generic/string.h>
+#include <util/stream/output.h>
+#pragma clang attribute push(__attribute__((target("sse4.2"))), apply_to=function)
namespace NSimd {
namespace NSSE42 {
@@ -67,11 +70,20 @@ struct TBase8: TBase<TSimd8<T>> {
{
}
- inline TBase8(const __m128i _value)
- : TBase<TSimd8<T>>(_value)
+ inline TBase8(const __m128i value)
+ : TBase<TSimd8<T>>(value)
{
}
+ template<int N>
+ inline TSimd8<T> Blend16(const TSimd8<T> other) {
+ return _mm_blend_epi16(this->Value, other->Value, N);
+ }
+
+ inline TSimd8<T> BlendVar(const TSimd8<T> other, const TSimd8<T> mask) {
+ return _mm_blendv_epi8(this->Value, other->Value, mask);
+ }
+
friend inline Mask operator==(const TSimd8<T> lhs, const TSimd8<T> rhs) {
return _mm_cmpeq_epi8(lhs.Value, rhs.Value);
}
@@ -132,6 +144,27 @@ struct TBase8Numeric: TBase8<T> {
return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values));
}
+ static inline TSimd8<T> LoadAligned(const T values[16]) {
+ return _mm_load_si128(reinterpret_cast<const __m128i *>(values));
+ }
+
+
+ inline void LoadStream(T dst[16]) const {
+ return _mm_stream_load_si128(reinterpret_cast<__m128i *>(dst), this->Value);
+ }
+
+ inline void Store(T dst[16]) const {
+ return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), this->Value);
+ }
+
+ inline void StoreAligned(T dst[16]) const {
+ return _mm_store_si128(reinterpret_cast<__m128i *>(dst), this->Value);
+ }
+
+ inline void StoreStream(T dst[16]) const {
+ return _mm_stream_si128(reinterpret_cast<__m128i *>(dst), this->Value);
+ }
+
static inline TSimd8<T> Repeat16(
T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7,
T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15
@@ -142,8 +175,32 @@ struct TBase8Numeric: TBase8<T> {
);
}
- inline void Store(T dst[16]) const {
- return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), this->Value);
+
+
+ template<typename TOut>
+ void Log(IOutputStream& out, TString delimeter = " ", TString end = "\n") {
+ const size_t n = sizeof(this->Value) / sizeof(TOut);
+ TOut buf[n];
+ Store((i8*) buf);
+ if (n == sizeof(this->Value)) {
+ for (size_t i = 0; i < n; i += 1) {
+ out << int(buf[i]);
+ if (i + 1 < n) {
+ out << delimeter;
+ } else {
+ out << end;
+ }
+ }
+ } else {
+ for (size_t i = 0; i < n; i += 1) {
+ out << buf[i];
+ if (i + 1 < n) {
+ out << delimeter;
+ } else {
+ out << end;
+ }
+ }
+ }
}
inline TSimd8<T> operator+(const TSimd8<T> other) const {
diff --git a/ydb/library/yql/utils/simd/simd_ut.cpp b/ydb/library/yql/utils/simd/simd_ut.cpp
index 5fb5b299573..c1c0cb6840b 100644
--- a/ydb/library/yql/utils/simd/simd_ut.cpp
+++ b/ydb/library/yql/utils/simd/simd_ut.cpp
@@ -1,9 +1,13 @@
#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/generic/ptr.h>
#include <util/system/cpu_id.h>
+#include <util/system/types.h>
+
#include "simd.h"
template<typename TTraits>
-void Reverse(ui8* buf, ui8 *result_buf, int len) {
+void Reverse(ui8* buf, ui8* result_buf, int len) {
using TSimdUI8 = typename TTraits::template TSimd8<ui8>;
int id = 0;
while (id + TTraits::Size <= len) {
@@ -18,6 +22,7 @@ void Reverse(ui8* buf, ui8 *result_buf, int len) {
}
struct TTestFactory {
+
template<typename T>
int Create() const {
return T::Size;
@@ -247,11 +252,14 @@ Y_UNIT_TEST_SUITE(SimdFallback) {
Y_UNIT_TEST(BestTrait) {
TTestFactory x;
if (NX86::HaveAVX2()) {
- UNIT_ASSERT_EQUAL(NSimd::SelectSimdTraits(x), 32);
+ auto y = NSimd::SelectSimdTraits<TTestFactory>(x);
+ UNIT_ASSERT_EQUAL(y, 32);
} else if (NX86::HaveSSE42()) {
- UNIT_ASSERT_EQUAL(NSimd::SelectSimdTraits(x), 16);
+ auto y = NSimd::SelectSimdTraits<TTestFactory>(x);
+ UNIT_ASSERT_EQUAL(y, 16);
} else {
- UNIT_ASSERT_EQUAL(NSimd::SelectSimdTraits(x), 8);
+ auto y = NSimd::SelectSimdTraits<TTestFactory>(x);
+ UNIT_ASSERT_EQUAL(y, 8);
}
}
} \ No newline at end of file
diff --git a/ydb/library/yql/utils/simd/ya.make b/ydb/library/yql/utils/simd/ya.make
index f17981ff0bb..e6a05de3592 100644
--- a/ydb/library/yql/utils/simd/ya.make
+++ b/ydb/library/yql/utils/simd/ya.make
@@ -1,3 +1,19 @@
+EXECTEST()
+
+RUN(
+ stream_store
+)
+
+DEPENDS(
+ ydb/library/yql/utils/simd/exec/stream_store
+)
+
+END()
+
+RECURSE(
+ exec
+)
+
RECURSE_FOR_TESTS(
ut
) \ No newline at end of file